add patch for using newer rocm versions

author: acxz 2020-09-30 10:09:59 -0400
committer: acxz 2020-09-30 10:10:56 -0400
commit: ad6bd8ba4d83cf71c450fa15b9c68559749f4739 (patch)
tree: b2c24ce0af30baf0464d46c86defc8c9cd030b88
parent: 1c2a2f45cdd06ea6442d24b14824bf1fec5aa9a8 (diff)
download: aur-ad6bd8ba4d83cf71c450fa15b9c68559749f4739.tar.gz
3 files changed, 701 insertions, 8 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 30b438cc38f0..6ca94b2e930e 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
 pkgbase = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning
 	pkgver = 2.3.1
-	pkgrel = 1
+	pkgrel = 2
 	url = https://www.tensorflow.org/
 	arch = x86_64
 	license = APACHE
@@ -31,7 +31,7 @@ pkgbase = tensorflow-rocm
 	source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
 	source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
 	source = fix_occupancy_block.patch
-	source = rocm-3.7.patch::https://github.com/tensorflow/tensorflow/pull/42689.patch
+	source = new-rocm.patch
 	sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0
 	sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
 	sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
@@ -40,7 +40,7 @@ pkgbase = tensorflow-rocm
 	sha512sums = 75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20
 	sha512sums = 42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5
 	sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126
-	sha512sums = SKIP
+	sha512sums = 080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317
 
 pkgname = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM)
diff --git a/PKGBUILD b/PKGBUILD
index 52e2e0dd7a7f..5e6f3b4383ab 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -16,7 +16,7 @@ pkgname=()
 
 pkgver=2.3.1
 _pkgver=2.3.1
-pkgrel=1
+pkgrel=2
 pkgdesc="Library for computation using data flow graphs for scalable machine learning"
 url="https://www.tensorflow.org/"
 license=('APACHE')
@@ -34,7 +34,7 @@ source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archi
         fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
         fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
         fix_occupancy_block.patch
-        rocm-3.7.patch::https://github.com/tensorflow/tensorflow/pull/42689.patch)
+        new-rocm.patch)
 
 sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0'
             'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
@@ -44,7 +44,7 @@ sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba
             '75972acf0ec53b28aa6c93de77a385acaf675c0d0ae93b6545f67414e9895cbd1074a5d65b211390846b736df271a567b49ec4c992883ad83c060f708bbe0d20'
             '42fc09bc15412f3b9a82f36485735faed0dcc2f47d72c5bfc451bc09a2aad472db59edb387455fb6594b1606de3a7789917e1fb31280c7044898097ec37db3d5'
             '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126'
-            'SKIP')
+            '080fd9d4e1228ceb04901a0caceb18b965ef199704196a9b7711fcada3a8cfc2f65c529c4c0e05960ab1e469d203727bf0bbded82d895c13e0e2ab29ae524317')
 
 get_pyver () {
   python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
@@ -89,8 +89,9 @@ prepare() {
   # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2
   patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch
 
-  # Update codebase for ROCm 3.7
-  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/rocm-3.7.patch
+  # Patch for ROCm 3.7 and later
+  # https://github.com/tensorflow/tensorflow/pull/42689
+  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/new-rocm.patch
 
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm
diff --git a/new-rocm.patch b/new-rocm.patch
new file mode 100644
index 000000000000..01eb2b4fab8c
--- /dev/null
+++ b/new-rocm.patch
@@ -0,0 +1,692 @@
+From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Thu, 16 Jul 2020 19:38:03 +0000
+Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to
+ libamdhip64.so instead
+
+With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so.
+
+For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?).
+
+This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead.
+
+See following JIRA tickets for further details:
+
+* http://ontrack-internal.amd.com/browse/SWDEV-244762
+* http://ontrack-internal.amd.com/browse/SWDEV-238533
+---
+ tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +-
+ .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 -------
+ third_party/gpus/rocm_configure.bzl                       | 8 +++-----
+ 3 files changed, 4 insertions(+), 13 deletions(-)
+
+diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
+index 70b1ebe070a76..84293b7767a20 100644
+--- a/tensorflow/stream_executor/platform/default/dso_loader.cc
++++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
+@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() {
+   return GetDsoHandle("hipsparse", "");
+ }
+ 
+-port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
++port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); }
+ 
+ }  // namespace DsoLoader
+ 
+diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+index 8848bd32c2e1d..d5bfe78c6449d 100755
+--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
++++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}'
+ HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True"
+ HIP_RUNTIME_PATH = '%{hip_runtime_path}'
+ HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
+-HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
+-HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
+ ROCR_RUNTIME_PATH = '%{rocr_runtime_path}'
+ ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}'
+ VERBOSE = '%{crosstool_verbose}'=='1'
+@@ -267,11 +265,6 @@ def main():
+     gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH)
+     gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH)
+     gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY)
+-    # do not link with HCC runtime library in case hip-clang toolchain is used
+-    if not HIPCC_IS_HIPCLANG:
+-      gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
+-      gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
+-      gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
+     gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
+     gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
+     gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
+diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
+index 1312574f0aa46..0508279518894 100644
+--- a/third_party/gpus/rocm_configure.bzl
++++ b/third_party/gpus/rocm_configure.bzl
+@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
+     libs_paths = [
+         (name, _rocm_lib_paths(repository_ctx, name, path))
+         for name, path in [
+-            ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"),
++            ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
+             ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
+             ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
+             ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
+@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx):
+         "rocm/BUILD",
+         tpl_paths["rocm:BUILD"],
+         {
+-            "%{hip_lib}": rocm_libs["hip_hcc"].file_name,
++            "%{hip_lib}": rocm_libs["amdhip64"].file_name,
+             "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+             "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+             "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx):
+             "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
+             "%{rocr_runtime_library}": "hsa-runtime64",
+             "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
+-            "%{hip_runtime_library}": "hip_hcc",
+-            "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
+-            "%{hcc_runtime_library}": "mcwamp",
++            "%{hip_runtime_library}": "amdhip64",
+             "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
+             "%{gcc_host_compiler_path}": str(cc),
+         },
+
+From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Fri, 17 Jul 2020 01:04:58 +0000
+Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code
+
+This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen
+https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325
+
+The MIOpen release that includes that change will be included in the next ROCm release.
+This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7)
+---
+ tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
+index 80306105d4adf..4c5a740dfb090 100644
+--- a/tensorflow/stream_executor/rocm/rocm_dnn.cc
++++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc
+@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) {
+     case miopenConvolutionFwdAlgoImplicitGEMM:
+       s = "Implicit GEMM";
+       break;
+-    case miopenConvolutionFwdAlgoStaticCompiledGEMM:
+-      s = "Static Compiled GEMM";
+-      break;
+   }
+   return s;
+ }
+@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) {
+     case miopenConvolutionAlgoImplicitGEMM:
+       s = "Implicit GEMM";
+       break;
+-    case miopenConvolutionAlgoStaticCompiledGEMM:
+-      s = "Static Compiled GEMM";
+-      break;
+   }
+   return s;
+ }
+
+From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Tue, 11 Aug 2020 02:09:46 +0000
+Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's
+ missing in their headers. "
+
+---
+ tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------
+ tensorflow/core/kernels/rnn/blas_gemm.h           | 5 -----
+ 2 files changed, 11 deletions(-)
+
+diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
+index 8849c3f4eddbb..ecc58da315f6b 100644
+--- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
++++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
+@@ -30,12 +30,6 @@ limitations under the License.
+ #include "tensorflow/core/platform/types.h"
+ 
+ #include "tensorflow/core/platform/logging.h"
+-
+-#ifdef __HIP_DEVICE_COMPILE__
+-// Provide ldexp float overload for HIP, it's missing in their headers.
+-__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
+-#endif
+-
+ namespace tensorflow {
+ namespace functor {
+ 
+diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h
+index 74f4cd2bb39a4..126e1edef17a9 100644
+--- a/tensorflow/core/kernels/rnn/blas_gemm.h
++++ b/tensorflow/core/kernels/rnn/blas_gemm.h
+@@ -25,11 +25,6 @@ limitations under the License.
+ #include "tensorflow/core/kernels/eigen_contraction_kernel.h"
+ #endif
+ 
+-#ifdef __HIP_DEVICE_COMPILE__
+-// Provide ldexp float overload for HIP, it's missing in their headers.
+-__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
+-#endif
+-
+ namespace tensorflow {
+ class OpKernelContext;
+ namespace functor {
+
+From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Wed, 12 Aug 2020 00:39:02 +0000
+Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7
+
+---
+ .../tools/ci_build/linux/rocm/run_cc_core.sh  | 34 +++++++++++++------
+ .../ci_build/linux/rocm/run_csb_tests.sh      | 27 ++++++++++-----
+ .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++----
+ .../tools/ci_build/xla/linux/rocm/run_py3.sh  | 33 ++++++++++++------
+ 4 files changed, 79 insertions(+), 38 deletions(-)
+
+diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
+index 1f4a36f8de0f5..92d21cb133be9 100755
+--- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
++++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
+@@ -18,20 +18,27 @@
+ set -e
+ set -x
+ 
+-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
++TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++TF_TESTS_PER_GPU=1
++N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+ 
+ echo ""
+-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
++echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+ echo ""
+ 
++# First positional argument (if any) specifies the ROCM_INSTALL_DIR
++ROCM_INSTALL_DIR=/opt/rocm-3.7.0
++if [[ -n $1 ]]; then
++    ROCM_INSTALL_DIR=$1
++fi
++
+ # Run configure.
+ export PYTHON_BIN_PATH=`which python3`
+ export CC_OPT_FLAGS='-mavx'
+ 
+ export TF_NEED_ROCM=1
+-export ROCM_PATH=/opt/rocm-3.3.0
+-export TF_GPU_COUNT=${N_GPUS}
++export ROCM_PATH=$ROCM_INSTALL_DIR
+ 
+ yes "" | $PYTHON_BIN_PATH configure.py
+ 
+@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
+ bazel test \
+       --config=rocm \
+       -k \
+-      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
++      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
+       --test_lang_filters=cc \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=${TF_GPU_COUNT}\
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --build_tests_only \
+       --test_output=errors \
+       --test_sharding_strategy=disabled \
+-      --test_size_filters=small,medium \
++      --test_size_filters=small,medium,large \
+       --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+       -- \
+       //tensorflow/... \
+@@ -59,11 +68,14 @@ bazel test \
+       --config=rocm \
+       -k \
+       --test_tag_filters=gpu \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=1 \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --build_tests_only \
+       --test_output=errors \
+       --test_sharding_strategy=disabled \
++      --test_size_filters=small,medium,large \
+       -- \
+       //tensorflow/core/nccl:nccl_manager_test
+diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
+index 4962b2789b1c0..80c0686e64724 100755
+--- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
++++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
+@@ -18,20 +18,27 @@
+ set -e
+ set -x
+ 
+-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
++TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++TF_TESTS_PER_GPU=1
++N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+ 
+ echo ""
+-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
++echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+ echo ""
+ 
++# First positional argument (if any) specifies the ROCM_INSTALL_DIR
++ROCM_INSTALL_DIR=/opt/rocm-3.7.0
++if [[ -n $1 ]]; then
++    ROCM_INSTALL_DIR=$1
++fi
++
+ # Run configure.
+ export PYTHON_BIN_PATH=`which python3`
+ export CC_OPT_FLAGS='-mavx'
+ 
+ export TF_NEED_ROCM=1
+-export ROCM_PATH=/opt/rocm-3.3.0
+-export TF_GPU_COUNT=${N_GPUS}
++export ROCM_PATH=$ROCM_INSTALL_DIR
+ 
+ yes "" | $PYTHON_BIN_PATH configure.py
+ 
+@@ -40,8 +47,10 @@ bazel test \
+       --config=rocm \
+       -k \
+       --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=${TF_GPU_COUNT} \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --test_output=errors \
+       --test_sharding_strategy=disabled \
+@@ -60,8 +69,8 @@ bazel test \
+       --test_tag_filters=gpu \
+       --test_timeout 600,900,2400,7200 \
+       --test_output=errors \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=1 \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
+       --test_sharding_strategy=disabled \
+       -- \
+       //tensorflow/core/nccl:nccl_manager_test
+diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+index 7ea866f8e2032..3a09081dd6ac6 100755
+--- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
++++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+@@ -18,20 +18,27 @@
+ set -e
+ set -x
+ 
+-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
++TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++TF_TESTS_PER_GPU=1
++N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+ 
+ echo ""
+-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
++echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+ echo ""
+ 
++# First positional argument (if any) specifies the ROCM_INSTALL_DIR
++ROCM_INSTALL_DIR=/opt/rocm-3.7.0
++if [[ -n $1 ]]; then
++    ROCM_INSTALL_DIR=$1
++fi
++
+ # Run configure.
+ export PYTHON_BIN_PATH=`which python3`
+ export CC_OPT_FLAGS='-mavx'
+ 
+ export TF_NEED_ROCM=1
+-export ROCM_PATH=/opt/rocm-3.3.0
+-export TF_GPU_COUNT=${N_GPUS}
++export ROCM_PATH=$ROCM_INSTALL_DIR
+ 
+ yes "" | $PYTHON_BIN_PATH configure.py
+ 
+@@ -41,8 +48,10 @@ bazel test \
+       -k \
+       --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+       --test_lang_filters=py \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=${TF_GPU_COUNT} \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --build_tests_only \
+       --test_output=errors \
+diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
+index 6ce1fad9cc754..d623b77d5333d 100755
+--- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
++++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
+@@ -18,20 +18,27 @@
+ set -e
+ set -x
+ 
+-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
++TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
++TF_TESTS_PER_GPU=1
++N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+ 
+ echo ""
+-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
++echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+ echo ""
+ 
++# First positional argument (if any) specifies the ROCM_INSTALL_DIR
++ROCM_INSTALL_DIR=/opt/rocm-3.7.0
++if [[ -n $1 ]]; then
++    ROCM_INSTALL_DIR=$1
++fi
++
+ # Run configure.
+ export PYTHON_BIN_PATH=`which python3`
+ export CC_OPT_FLAGS='-mavx'
+ 
+ export TF_NEED_ROCM=1
+-export ROCM_PATH=/opt/rocm-3.3.0
+-export TF_GPU_COUNT=${N_GPUS}
++export ROCM_PATH=$ROCM_INSTALL_DIR
+ 
+ yes "" | $PYTHON_BIN_PATH configure.py
+ echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
+@@ -41,9 +48,11 @@ bazel test \
+       --config=rocm \
+       --config=xla \
+       -k \
+-      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=${TF_GPU_COUNT} \
++      --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --build_tests_only \
+       --test_output=errors \
+@@ -65,9 +74,11 @@ bazel test \
+       --config=rocm \
+       --config=xla \
+       -k \
+-      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+-      --jobs=${N_JOBS} \
+-      --local_test_jobs=${TF_GPU_COUNT} \
++      --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
++      --jobs=${N_BUILD_JOBS} \
++      --local_test_jobs=${N_TEST_JOBS} \
++      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
++      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+       --test_timeout 600,900,2400,7200 \
+       --build_tests_only \
+       --test_output=errors \
+
+From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Wed, 26 Aug 2020 15:21:31 +0000
+Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7
+
+---
+ tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
+index 4f5d3ae7291b1..d209173258ada 100644
+--- a/tensorflow/tools/ci_build/Dockerfile.rocm
++++ b/tensorflow/tools/ci_build/Dockerfile.rocm
+@@ -3,8 +3,10 @@
+ FROM ubuntu:bionic
+ MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
+ 
+-ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
+-ARG ROCM_PATH=/opt/rocm-3.3.0
++ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/
++ARG ROCM_BUILD_NAME=xenial
++ARG ROCM_BUILD_NUM=main
++ARG ROCM_PATH=/opt/rocm-3.7.0
+ 
+ ENV DEBIAN_FRONTEND noninteractive
+ ENV TF_NEED_ROCM 1
+@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common
+ 
+ # Add rocm repository
+ RUN apt-get clean all
+-RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+-RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
++RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/*  ]] ; then \
++      wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
++      echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
++    else \
++      echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
++    fi'
+ 
+ # Install misc pkgs
+ RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+
+From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Wed, 12 Aug 2020 15:51:34 +0000
+Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in
+ hipDeviceGetStreamPriorityRange
+
+Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`.
+This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior.
+
+Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range
+---
+ .../common_runtime/gpu/gpu_device_test.cc     | 34 +++++--------------
+ 1 file changed, 8 insertions(+), 26 deletions(-)
+
+diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+index 6448fc56af7a1..21c75244b5feb 100644
+--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
++++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
+ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
+   {
+ #if TENSORFLOW_USE_ROCM
+-    // Priority outside the range (0, 2) for AMD GPUs
++    // Priority outside the range (-1, 1) for AMD GPUs
+     SessionOptions opts =
+-        MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
++        MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}});
+ #else
+     // Priority outside the range (-2, 0) for NVidia GPUs
+     SessionOptions opts =
+@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
+ #if TENSORFLOW_USE_ROCM
+     ExpectErrorMessageSubstr(
+         status,
+-        "Priority -1 is outside the range of supported priorities [0,2] for"
++        "Priority -2 is outside the range of supported priorities [-1,1] for"
+         " virtual device 0 on GPU# 0");
+ #else
+     ExpectErrorMessageSubstr(
+@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
+   }
+   {
+ #if TENSORFLOW_USE_ROCM
+-    // Priority outside the range (0, 2) for AMD GPUs
+-    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
++    // Priority outside the range (-1, 1) for AMD GPUs
++    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
+ #else
+     // Priority outside the range (-2, 0) for NVidia GPUs
+     SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
+@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
+ #if TENSORFLOW_USE_ROCM
+     ExpectErrorMessageSubstr(
+         status,
+-        "Priority 3 is outside the range of supported priorities [0,2] for"
++        "Priority 2 is outside the range of supported priorities [-1,1] for"
+         " virtual device 0 on GPU# 0");
+ #else
+     ExpectErrorMessageSubstr(
+@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
+ }
+ 
+ TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
+-#if TENSORFLOW_USE_ROCM
+-  // Valid range for priority values on AMD GPUs in (0,2)
+-  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
+-#else
++  // Valid range for priority values on AMD GPUs in (-1,1)
+   // Valid range for priority values on NVidia GPUs in (-2, 0)
+   SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
+-#endif
+   std::vector<std::unique_ptr<Device>> devices;
+   TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+       opts, kDeviceNamePrefix, &devices));
+   EXPECT_EQ(2, devices.size());
+   EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+   EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
+-#if TENSORFLOW_USE_ROCM
+-  EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
+-  EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
+-#else
+   EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
+   EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
+-#endif
+   ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
+   ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
+   EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
+@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
+   }
+   {
+     // Multile virtual devices with matching priority.
+-#if TENSORFLOW_USE_ROCM
+-    // Valid range for priority values on AMD GPUs in (0,2)
+-    SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
+-#else
++    // Valid range for priority values on AMD GPUs in (-1,1)
+     // Valid range for priority values on NVidia GPUs in (-2, 0)
+     SessionOptions opts =
+         MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
+-#endif
+     std::vector<std::unique_ptr<Device>> devices;
+     TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+         opts, kDeviceNamePrefix, &devices));
+     EXPECT_EQ(2, devices.size());
+     EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+     EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
+-#if TENSORFLOW_USE_ROCM
+-    EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
+-    EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
+-#else
+     EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
+     EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
+-#endif
+   }
+ }
+ 
+
+From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Wed, 12 Aug 2020 23:05:32 +0000
+Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA
+ ticket 236756, and also removing the no_rocm tag from the tests that contain
+ those subtests
+
+---
+ tensorflow/python/ops/parallel_for/math_test.py      | 5 +++++
+ tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
+index 933ce765cdbfa..367f40d341115 100644
+--- a/tensorflow/python/ops/parallel_for/math_test.py
++++ b/tensorflow/python/ops/parallel_for/math_test.py
+@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self):
+     self._test_unary_cwise_ops(complex_ops, True)
+ 
+   def test_unary_cwise_real_ops_1(self):
++    if test.is_built_with_rocm():
++      # TODO(rocm):
++      # This fails on ROCm...see JIRA ticket 236756
++      self.skipTest('Fails on ROCM')
++
+     real_ops = [
+         lambda x: math_ops.acosh(1 + math_ops.square(x)),
+         math_ops.abs,
+diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+index 0237624aa451d..7a1d7c1882af1 100644
+--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
++++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+@@ -139,6 +139,11 @@ def assertSameShape(self, x, y):
+       ]
+       )  # pyformat: disable
+   def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
++    if test_util.IsBuiltWithROCm():
++      # TODO(rocm):
++      # This fails on ROCm...see JIRA ticket 236756
++      self.skipTest('Fails on ROCM')
++
+     result = op(x, **extra_args)
+ 
+     # Run the wrapped op on the dense values, for comparison.
+
+From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001
+From: Deven Desai <deven.desai.amd@gmail.com>
+Date: Wed, 26 Aug 2020 22:01:18 +0000
+Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with
+ ROCm 3.7 until PR #42288 gets merged
+
+---
+ tensorflow/python/BUILD                    | 1 +
+ tensorflow/python/keras/optimizer_v2/BUILD | 2 ++
+ 2 files changed, 3 insertions(+)
+
+diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
+index a111237e0565d..5252ebbed6e4b 100644
+--- a/tensorflow/python/BUILD
++++ b/tensorflow/python/BUILD
+@@ -5423,6 +5423,7 @@ cuda_py_test(
+     python_version = "PY3",
+     shard_count = 10,
+     tags = [
++        "no_rocm",
+         "no_windows_gpu",
+         "noasan",  # b/159332048
+         "nomsan",  # b/148630708
+diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
+index b208e2e1e1e6b..11966ce8211d2 100644
+--- a/tensorflow/python/keras/optimizer_v2/BUILD
++++ b/tensorflow/python/keras/optimizer_v2/BUILD
+@@ -157,6 +157,7 @@ cuda_py_test(
+     size = "medium",
+     srcs = ["adadelta_test.py"],
+     shard_count = 4,
++    tags = ["no_rocm"],
+     deps = [
+         ":optimizer_v2",
+         "//tensorflow/python:client_testlib",
+@@ -298,6 +299,7 @@ cuda_py_test(
+     size = "medium",
+     srcs = ["rmsprop_test.py"],
+     shard_count = 2,
++    tags = ["no_rocm"],
+     deps = [
+         ":optimizer_v2",
+         "//tensorflow/python:array_ops",
author	acxz	2020-09-30 10:09:59 -0400
committer	acxz	2020-09-30 10:10:56 -0400
commit	ad6bd8ba4d83cf71c450fa15b9c68559749f4739 (patch)
tree	b2c24ce0af30baf0464d46c86defc8c9cd030b88
parent	1c2a2f45cdd06ea6442d24b14824bf1fec5aa9a8 (diff)
download	aur-ad6bd8ba4d83cf71c450fa15b9c68559749f4739.tar.gz