diff options
Diffstat (limited to 'new-rocm.patch')
-rw-r--r-- | new-rocm.patch | 692 |
1 files changed, 0 insertions, 692 deletions
diff --git a/new-rocm.patch b/new-rocm.patch deleted file mode 100644 index 01eb2b4fab8c..000000000000 --- a/new-rocm.patch +++ /dev/null @@ -1,692 +0,0 @@ -From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Thu, 16 Jul 2020 19:38:03 +0000 -Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to - libamdhip64.so instead - -With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so. - -For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?). - -This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead. - -See following JIRA tickets for further details: - -* http://ontrack-internal.amd.com/browse/SWDEV-244762 -* http://ontrack-internal.amd.com/browse/SWDEV-238533 ---- - tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +- - .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 ------- - third_party/gpus/rocm_configure.bzl | 8 +++----- - 3 files changed, 4 insertions(+), 13 deletions(-) - -diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc -index 70b1ebe070a76..84293b7767a20 100644 ---- a/tensorflow/stream_executor/platform/default/dso_loader.cc -+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc -@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() { - return GetDsoHandle("hipsparse", ""); - } - --port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); } -+port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); } - - } // namespace DsoLoader - -diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -index 8848bd32c2e1d..d5bfe78c6449d 100755 ---- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl -@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}' - HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True" - HIP_RUNTIME_PATH = '%{hip_runtime_path}' - HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}' --HCC_RUNTIME_PATH = '%{hcc_runtime_path}' --HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}' - ROCR_RUNTIME_PATH = '%{rocr_runtime_path}' - ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}' - VERBOSE = '%{crosstool_verbose}'=='1' -@@ -267,11 +265,6 @@ def main(): - gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH) - gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH) - gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY) -- # do not link with HCC runtime library in case hip-clang toolchain is used -- if not HIPCC_IS_HIPCLANG: -- gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH) -- gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH) -- gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY) - gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH) - gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH) - gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY) -diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl -index 1312574f0aa46..0508279518894 100644 ---- a/third_party/gpus/rocm_configure.bzl -+++ b/third_party/gpus/rocm_configure.bzl -@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin): - libs_paths = [ - (name, _rocm_lib_paths(repository_ctx, name, path)) - for name, path in [ -- ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"), -+ ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"), - ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"), - ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"), - ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"), -@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx): - "rocm/BUILD", - tpl_paths["rocm:BUILD"], - { -- "%{hip_lib}": rocm_libs["hip_hcc"].file_name, -+ "%{hip_lib}": rocm_libs["amdhip64"].file_name, - "%{rocblas_lib}": rocm_libs["rocblas"].file_name, - "%{rocfft_lib}": rocm_libs["rocfft"].file_name, - "%{hiprand_lib}": rocm_libs["hiprand"].file_name, -@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx): - "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib", - "%{rocr_runtime_library}": "hsa-runtime64", - "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib", -- "%{hip_runtime_library}": "hip_hcc", -- "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib", -- "%{hcc_runtime_library}": "mcwamp", -+ "%{hip_runtime_library}": "amdhip64", - "%{crosstool_verbose}": _crosstool_verbose(repository_ctx), - "%{gcc_host_compiler_path}": str(cc), - }, - -From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Fri, 17 Jul 2020 01:04:58 +0000 -Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code - -This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen -https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325 - -The MIOpen release that includes that change will be included in the next ROCm release. -This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7) ---- - tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc -index 80306105d4adf..4c5a740dfb090 100644 ---- a/tensorflow/stream_executor/rocm/rocm_dnn.cc -+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc -@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) { - case miopenConvolutionFwdAlgoImplicitGEMM: - s = "Implicit GEMM"; - break; -- case miopenConvolutionFwdAlgoStaticCompiledGEMM: -- s = "Static Compiled GEMM"; -- break; - } - return s; - } -@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) { - case miopenConvolutionAlgoImplicitGEMM: - s = "Implicit GEMM"; - break; -- case miopenConvolutionAlgoStaticCompiledGEMM: -- s = "Static Compiled GEMM"; -- break; - } - return s; - } - -From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Tue, 11 Aug 2020 02:09:46 +0000 -Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's - missing in their headers. " - ---- - tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------ - tensorflow/core/kernels/rnn/blas_gemm.h | 5 ----- - 2 files changed, 11 deletions(-) - -diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -index 8849c3f4eddbb..ecc58da315f6b 100644 ---- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h -@@ -30,12 +30,6 @@ limitations under the License. - #include "tensorflow/core/platform/types.h" - - #include "tensorflow/core/platform/logging.h" -- --#ifdef __HIP_DEVICE_COMPILE__ --// Provide ldexp float overload for HIP, it's missing in their headers. --__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); } --#endif -- - namespace tensorflow { - namespace functor { - -diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h -index 74f4cd2bb39a4..126e1edef17a9 100644 ---- a/tensorflow/core/kernels/rnn/blas_gemm.h -+++ b/tensorflow/core/kernels/rnn/blas_gemm.h -@@ -25,11 +25,6 @@ limitations under the License. - #include "tensorflow/core/kernels/eigen_contraction_kernel.h" - #endif - --#ifdef __HIP_DEVICE_COMPILE__ --// Provide ldexp float overload for HIP, it's missing in their headers. --__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); } --#endif -- - namespace tensorflow { - class OpKernelContext; - namespace functor { - -From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 00:39:02 +0000 -Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7 - ---- - .../tools/ci_build/linux/rocm/run_cc_core.sh | 34 +++++++++++++------ - .../ci_build/linux/rocm/run_csb_tests.sh | 27 ++++++++++----- - .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++---- - .../tools/ci_build/xla/linux/rocm/run_py3.sh | 33 ++++++++++++------ - 4 files changed, 79 insertions(+), 38 deletions(-) - -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -index 1f4a36f8de0f5..92d21cb133be9 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py - bazel test \ - --config=rocm \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \ - --test_lang_filters=cc \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT}\ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -- --test_size_filters=small,medium \ -+ --test_size_filters=small,medium,large \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ - -- \ - //tensorflow/... \ -@@ -59,11 +68,14 @@ bazel test \ - --config=rocm \ - -k \ - --test_tag_filters=gpu \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=1 \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -+ --test_size_filters=small,medium,large \ - -- \ - //tensorflow/core/nccl:nccl_manager_test -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -index 4962b2789b1c0..80c0686e64724 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -40,8 +47,10 @@ bazel test \ - --config=rocm \ - -k \ - --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --test_output=errors \ - --test_sharding_strategy=disabled \ -@@ -60,8 +69,8 @@ bazel test \ - --test_tag_filters=gpu \ - --test_timeout 600,900,2400,7200 \ - --test_output=errors \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=1 \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ - --test_sharding_strategy=disabled \ - -- \ - //tensorflow/core/nccl:nccl_manager_test -diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -index 7ea866f8e2032..3a09081dd6ac6 100755 ---- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - -@@ -41,8 +48,10 @@ bazel test \ - -k \ - --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ - --test_lang_filters=py \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ -diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -index 6ce1fad9cc754..d623b77d5333d 100755 ---- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh -@@ -18,20 +18,27 @@ - set -e - set -x - --N_JOBS=$(grep -c ^processor /proc/cpuinfo) --N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) -+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) -+TF_TESTS_PER_GPU=1 -+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) - - echo "" --echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." -+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." - echo "" - -+# First positional argument (if any) specifies the ROCM_INSTALL_DIR -+ROCM_INSTALL_DIR=/opt/rocm-3.7.0 -+if [[ -n $1 ]]; then -+ ROCM_INSTALL_DIR=$1 -+fi -+ - # Run configure. - export PYTHON_BIN_PATH=`which python3` - export CC_OPT_FLAGS='-mavx' - - export TF_NEED_ROCM=1 --export ROCM_PATH=/opt/rocm-3.3.0 --export TF_GPU_COUNT=${N_GPUS} -+export ROCM_PATH=$ROCM_INSTALL_DIR - - yes "" | $PYTHON_BIN_PATH configure.py - echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc -@@ -41,9 +48,11 @@ bazel test \ - --config=rocm \ - --config=xla \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ -@@ -65,9 +74,11 @@ bazel test \ - --config=rocm \ - --config=xla \ - -k \ -- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -- --jobs=${N_JOBS} \ -- --local_test_jobs=${TF_GPU_COUNT} \ -+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ -+ --jobs=${N_BUILD_JOBS} \ -+ --local_test_jobs=${N_TEST_JOBS} \ -+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ -+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ - --test_timeout 600,900,2400,7200 \ - --build_tests_only \ - --test_output=errors \ - -From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 26 Aug 2020 15:21:31 +0000 -Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7 - ---- - tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++---- - 1 file changed, 10 insertions(+), 4 deletions(-) - -diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm -index 4f5d3ae7291b1..d209173258ada 100644 ---- a/tensorflow/tools/ci_build/Dockerfile.rocm -+++ b/tensorflow/tools/ci_build/Dockerfile.rocm -@@ -3,8 +3,10 @@ - FROM ubuntu:bionic - MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com> - --ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/ --ARG ROCM_PATH=/opt/rocm-3.3.0 -+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/ -+ARG ROCM_BUILD_NAME=xenial -+ARG ROCM_BUILD_NUM=main -+ARG ROCM_PATH=/opt/rocm-3.7.0 - - ENV DEBIAN_FRONTEND noninteractive - ENV TF_NEED_ROCM 1 -@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common - - # Add rocm repository - RUN apt-get clean all --RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - --RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list" -+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \ -+ wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \ -+ echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \ -+ else \ -+ echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \ -+ fi' - - # Install misc pkgs - RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - -From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 15:51:34 +0000 -Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in - hipDeviceGetStreamPriorityRange - -Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`. -This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior. - -Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range ---- - .../common_runtime/gpu/gpu_device_test.cc | 34 +++++-------------- - 1 file changed, 8 insertions(+), 26 deletions(-) - -diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -index 6448fc56af7a1..21c75244b5feb 100644 ---- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc -@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) { - TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - { - #if TENSORFLOW_USE_ROCM -- // Priority outside the range (0, 2) for AMD GPUs -+ // Priority outside the range (-1, 1) for AMD GPUs - SessionOptions opts = -- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); -+ MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}}); - #else - // Priority outside the range (-2, 0) for NVidia GPUs - SessionOptions opts = -@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - #if TENSORFLOW_USE_ROCM - ExpectErrorMessageSubstr( - status, -- "Priority -1 is outside the range of supported priorities [0,2] for" -+ "Priority -2 is outside the range of supported priorities [-1,1] for" - " virtual device 0 on GPU# 0"); - #else - ExpectErrorMessageSubstr( -@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - } - { - #if TENSORFLOW_USE_ROCM -- // Priority outside the range (0, 2) for AMD GPUs -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}}); -+ // Priority outside the range (-1, 1) for AMD GPUs -+ SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); - #else - // Priority outside the range (-2, 0) for NVidia GPUs - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); -@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { - #if TENSORFLOW_USE_ROCM - ExpectErrorMessageSubstr( - status, -- "Priority 3 is outside the range of supported priorities [0,2] for" -+ "Priority 2 is outside the range of supported priorities [-1,1] for" - " virtual device 0 on GPU# 0"); - #else - ExpectErrorMessageSubstr( -@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) { - } - - TEST_F(GPUDeviceTest, MultipleVirtualDevices) { --#if TENSORFLOW_USE_ROCM -- // Valid range for priority values on AMD GPUs in (0,2) -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); --#else -+ // Valid range for priority values on AMD GPUs in (-1,1) - // Valid range for priority values on NVidia GPUs in (-2, 0) - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}}); --#endif - std::vector<std::unique_ptr<Device>> devices; - TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( - opts, kDeviceNamePrefix, &devices)); - EXPECT_EQ(2, devices.size()); - EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); --#if TENSORFLOW_USE_ROCM -- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); -- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#else - EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); - EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#endif - ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size()); - ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size()); - EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id()); -@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { - } - { - // Multile virtual devices with matching priority. --#if TENSORFLOW_USE_ROCM -- // Valid range for priority values on AMD GPUs in (0,2) -- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}}); --#else -+ // Valid range for priority values on AMD GPUs in (-1,1) - // Valid range for priority values on NVidia GPUs in (-2, 0) - SessionOptions opts = - MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}}); --#endif - std::vector<std::unique_ptr<Device>> devices; - TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( - opts, kDeviceNamePrefix, &devices)); - EXPECT_EQ(2, devices.size()); - EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); --#if TENSORFLOW_USE_ROCM -- EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); -- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#else - EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority()); - EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority()); --#endif - } - } - - -From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 12 Aug 2020 23:05:32 +0000 -Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA - ticket 236756, and also removing the no_rocm tag from the tests that contain - those subtests - ---- - tensorflow/python/ops/parallel_for/math_test.py | 5 +++++ - tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++ - 2 files changed, 10 insertions(+) - -diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py -index 933ce765cdbfa..367f40d341115 100644 ---- a/tensorflow/python/ops/parallel_for/math_test.py -+++ b/tensorflow/python/ops/parallel_for/math_test.py -@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self): - self._test_unary_cwise_ops(complex_ops, True) - - def test_unary_cwise_real_ops_1(self): -+ if test.is_built_with_rocm(): -+ # TODO(rocm): -+ # This fails on ROCm...see JIRA ticket 236756 -+ self.skipTest('Fails on ROCM') -+ - real_ops = [ - lambda x: math_ops.acosh(1 + math_ops.square(x)), - math_ops.abs, -diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py -index 0237624aa451d..7a1d7c1882af1 100644 ---- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py -+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py -@@ -139,6 +139,11 @@ def assertSameShape(self, x, y): - ] - ) # pyformat: disable - def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args): -+ if test_util.IsBuiltWithROCm(): -+ # TODO(rocm): -+ # This fails on ROCm...see JIRA ticket 236756 -+ self.skipTest('Fails on ROCM') -+ - result = op(x, **extra_args) - - # Run the wrapped op on the dense values, for comparison. - -From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001 -From: Deven Desai <deven.desai.amd@gmail.com> -Date: Wed, 26 Aug 2020 22:01:18 +0000 -Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with - ROCm 3.7 until PR #42288 gets merged - ---- - tensorflow/python/BUILD | 1 + - tensorflow/python/keras/optimizer_v2/BUILD | 2 ++ - 2 files changed, 3 insertions(+) - -diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD -index a111237e0565d..5252ebbed6e4b 100644 ---- a/tensorflow/python/BUILD -+++ b/tensorflow/python/BUILD -@@ -5423,6 +5423,7 @@ cuda_py_test( - python_version = "PY3", - shard_count = 10, - tags = [ -+ "no_rocm", - "no_windows_gpu", - "noasan", # b/159332048 - "nomsan", # b/148630708 -diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD -index b208e2e1e1e6b..11966ce8211d2 100644 ---- a/tensorflow/python/keras/optimizer_v2/BUILD -+++ b/tensorflow/python/keras/optimizer_v2/BUILD -@@ -157,6 +157,7 @@ cuda_py_test( - size = "medium", - srcs = ["adadelta_test.py"], - shard_count = 4, -+ tags = ["no_rocm"], - deps = [ - ":optimizer_v2", - "//tensorflow/python:client_testlib", -@@ -298,6 +299,7 @@ cuda_py_test( - size = "medium", - srcs = ["rmsprop_test.py"], - shard_count = 2, -+ tags = ["no_rocm"], - deps = [ - ":optimizer_v2", - "//tensorflow/python:array_ops", |