aboutsummarylogtreecommitdiffstats
path: root/new-rocm.patch
diff options
context:
space:
mode:
Diffstat (limited to 'new-rocm.patch')
-rw-r--r--new-rocm.patch692
1 files changed, 0 insertions, 692 deletions
diff --git a/new-rocm.patch b/new-rocm.patch
deleted file mode 100644
index 01eb2b4fab8c..000000000000
--- a/new-rocm.patch
+++ /dev/null
@@ -1,692 +0,0 @@
-From fcc2de09eb38f45b678a5457f594ca594f2572c9 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Thu, 16 Jul 2020 19:38:03 +0000
-Subject: [PATCH 1/8] Change references to libhip_hcc.so to refer to
- libamdhip64.so instead
-
-With the switch to the new hipclang-vdi runtime (in ROCm 3.5), the new name for the HIP runtime library is libamdhip64.so.
-
-For backwards compatibility, ROCm 3.5 and ROCm 3.6 include a "libhip_hcc.so" softlink, which points to libamdhip64.so. That softlink will be going away starting with ROCm 3.7(?).
-
-This commit updates references to libhip_hcc.so (in the TF build) to use libamdhip64.so instead.
-
-See following JIRA tickets for further details:
-
-* http://ontrack-internal.amd.com/browse/SWDEV-244762
-* http://ontrack-internal.amd.com/browse/SWDEV-238533
----
- tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +-
- .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 -------
- third_party/gpus/rocm_configure.bzl | 8 +++-----
- 3 files changed, 4 insertions(+), 13 deletions(-)
-
-diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
-index 70b1ebe070a76..84293b7767a20 100644
---- a/tensorflow/stream_executor/platform/default/dso_loader.cc
-+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
-@@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() {
- return GetDsoHandle("hipsparse", "");
- }
-
--port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
-+port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); }
-
- } // namespace DsoLoader
-
-diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-index 8848bd32c2e1d..d5bfe78c6449d 100755
---- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-@@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}'
- HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True"
- HIP_RUNTIME_PATH = '%{hip_runtime_path}'
- HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
--HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
--HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
- ROCR_RUNTIME_PATH = '%{rocr_runtime_path}'
- ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}'
- VERBOSE = '%{crosstool_verbose}'=='1'
-@@ -267,11 +265,6 @@ def main():
- gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH)
- gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH)
- gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY)
-- # do not link with HCC runtime library in case hip-clang toolchain is used
-- if not HIPCC_IS_HIPCLANG:
-- gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
-- gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
-- gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
- gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
- gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
- gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
-diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
-index 1312574f0aa46..0508279518894 100644
---- a/third_party/gpus/rocm_configure.bzl
-+++ b/third_party/gpus/rocm_configure.bzl
-@@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
- libs_paths = [
- (name, _rocm_lib_paths(repository_ctx, name, path))
- for name, path in [
-- ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"),
-+ ("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
- ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
- ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
- ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
-@@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx):
- "rocm/BUILD",
- tpl_paths["rocm:BUILD"],
- {
-- "%{hip_lib}": rocm_libs["hip_hcc"].file_name,
-+ "%{hip_lib}": rocm_libs["amdhip64"].file_name,
- "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
- "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
- "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
-@@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx):
- "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
- "%{rocr_runtime_library}": "hsa-runtime64",
- "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
-- "%{hip_runtime_library}": "hip_hcc",
-- "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
-- "%{hcc_runtime_library}": "mcwamp",
-+ "%{hip_runtime_library}": "amdhip64",
- "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
- "%{gcc_host_compiler_path}": str(cc),
- },
-
-From 77fb7fd1c68f81c416fd909b6677277b3637be05 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Fri, 17 Jul 2020 01:04:58 +0000
-Subject: [PATCH 2/8] Removing references to `*StaticCompiledGEMM` from TF code
-
-This commit is in conjunction with this MIOpen PR which removes scgemm from MIOpen
-https://github.com/ROCmSoftwarePlatform/MIOpen/pull/325
-
-The MIOpen release that includes that change will be included in the next ROCm release.
-This commit removes references to `*StaticCompiledGEMM` from TF code to prepare for switching to the next ROCm release (3.7)
----
- tensorflow/stream_executor/rocm/rocm_dnn.cc | 6 ------
- 1 file changed, 6 deletions(-)
-
-diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-index 80306105d4adf..4c5a740dfb090 100644
---- a/tensorflow/stream_executor/rocm/rocm_dnn.cc
-+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc
-@@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) {
- case miopenConvolutionFwdAlgoImplicitGEMM:
- s = "Implicit GEMM";
- break;
-- case miopenConvolutionFwdAlgoStaticCompiledGEMM:
-- s = "Static Compiled GEMM";
-- break;
- }
- return s;
- }
-@@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) {
- case miopenConvolutionAlgoImplicitGEMM:
- s = "Implicit GEMM";
- break;
-- case miopenConvolutionAlgoStaticCompiledGEMM:
-- s = "Static Compiled GEMM";
-- break;
- }
- return s;
- }
-
-From 566d2a95c6140322241bce20fcfea952e837fda1 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Tue, 11 Aug 2020 02:09:46 +0000
-Subject: [PATCH 3/8] Reverting "Provide ldexp float overload for HIP, it's
- missing in their headers. "
-
----
- tensorflow/core/kernels/cwise_ops_gpu_common.cu.h | 6 ------
- tensorflow/core/kernels/rnn/blas_gemm.h | 5 -----
- 2 files changed, 11 deletions(-)
-
-diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-index 8849c3f4eddbb..ecc58da315f6b 100644
---- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
-@@ -30,12 +30,6 @@ limitations under the License.
- #include "tensorflow/core/platform/types.h"
-
- #include "tensorflow/core/platform/logging.h"
--
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- namespace functor {
-
-diff --git a/tensorflow/core/kernels/rnn/blas_gemm.h b/tensorflow/core/kernels/rnn/blas_gemm.h
-index 74f4cd2bb39a4..126e1edef17a9 100644
---- a/tensorflow/core/kernels/rnn/blas_gemm.h
-+++ b/tensorflow/core/kernels/rnn/blas_gemm.h
-@@ -25,11 +25,6 @@ limitations under the License.
- #include "tensorflow/core/kernels/eigen_contraction_kernel.h"
- #endif
-
--#ifdef __HIP_DEVICE_COMPILE__
--// Provide ldexp float overload for HIP, it's missing in their headers.
--__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
--#endif
--
- namespace tensorflow {
- class OpKernelContext;
- namespace functor {
-
-From 9dcaad456e194bf8d1e3962cd6ad272f4879d7f3 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 00:39:02 +0000
-Subject: [PATCH 4/8] updating ROCM CI scripts to use ROCm 3.7
-
----
- .../tools/ci_build/linux/rocm/run_cc_core.sh | 34 +++++++++++++------
- .../ci_build/linux/rocm/run_csb_tests.sh | 27 ++++++++++-----
- .../tools/ci_build/linux/rocm/run_py3_core.sh | 23 +++++++++----
- .../tools/ci_build/xla/linux/rocm/run_py3.sh | 33 ++++++++++++------
- 4 files changed, 79 insertions(+), 38 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-index 1f4a36f8de0f5..92d21cb133be9 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
- bazel test \
- --config=rocm \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
- --test_lang_filters=cc \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT}\
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-- --test_size_filters=small,medium \
-+ --test_size_filters=small,medium,large \
- --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
- -- \
- //tensorflow/... \
-@@ -59,11 +68,14 @@ bazel test \
- --config=rocm \
- -k \
- --test_tag_filters=gpu \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=1 \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-+ --test_size_filters=small,medium,large \
- -- \
- //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-index 4962b2789b1c0..80c0686e64724 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -40,8 +47,10 @@ bazel test \
- --config=rocm \
- -k \
- --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --test_output=errors \
- --test_sharding_strategy=disabled \
-@@ -60,8 +69,8 @@ bazel test \
- --test_tag_filters=gpu \
- --test_timeout 600,900,2400,7200 \
- --test_output=errors \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=1 \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
- --test_sharding_strategy=disabled \
- -- \
- //tensorflow/core/nccl:nccl_manager_test
-diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-index 7ea866f8e2032..3a09081dd6ac6 100755
---- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
-
-@@ -41,8 +48,10 @@ bazel test \
- -k \
- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
- --test_lang_filters=py \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-index 6ce1fad9cc754..d623b77d5333d 100755
---- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
-@@ -18,20 +18,27 @@
- set -e
- set -x
-
--N_JOBS=$(grep -c ^processor /proc/cpuinfo)
--N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
-+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
-+TF_TESTS_PER_GPU=1
-+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
-
- echo ""
--echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
-+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
- echo ""
-
-+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
-+ROCM_INSTALL_DIR=/opt/rocm-3.7.0
-+if [[ -n $1 ]]; then
-+ ROCM_INSTALL_DIR=$1
-+fi
-+
- # Run configure.
- export PYTHON_BIN_PATH=`which python3`
- export CC_OPT_FLAGS='-mavx'
-
- export TF_NEED_ROCM=1
--export ROCM_PATH=/opt/rocm-3.3.0
--export TF_GPU_COUNT=${N_GPUS}
-+export ROCM_PATH=$ROCM_INSTALL_DIR
-
- yes "" | $PYTHON_BIN_PATH configure.py
- echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
-@@ -41,9 +48,11 @@ bazel test \
- --config=rocm \
- --config=xla \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-@@ -65,9 +74,11 @@ bazel test \
- --config=rocm \
- --config=xla \
- -k \
-- --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-- --jobs=${N_JOBS} \
-- --local_test_jobs=${TF_GPU_COUNT} \
-+ --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
-+ --jobs=${N_BUILD_JOBS} \
-+ --local_test_jobs=${N_TEST_JOBS} \
-+ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
-+ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
- --test_timeout 600,900,2400,7200 \
- --build_tests_only \
- --test_output=errors \
-
-From 4b76a49a1a5741dece6d368b30f7125e20c12878 Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 15:21:31 +0000
-Subject: [PATCH 5/8] Updating Dockerfile.rocm to use ROCm 3.7
-
----
- tensorflow/tools/ci_build/Dockerfile.rocm | 14 ++++++++++----
- 1 file changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
-index 4f5d3ae7291b1..d209173258ada 100644
---- a/tensorflow/tools/ci_build/Dockerfile.rocm
-+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
-@@ -3,8 +3,10 @@
- FROM ubuntu:bionic
- MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
-
--ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
--ARG ROCM_PATH=/opt/rocm-3.3.0
-+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/
-+ARG ROCM_BUILD_NAME=xenial
-+ARG ROCM_BUILD_NUM=main
-+ARG ROCM_PATH=/opt/rocm-3.7.0
-
- ENV DEBIAN_FRONTEND noninteractive
- ENV TF_NEED_ROCM 1
-@@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common
-
- # Add rocm repository
- RUN apt-get clean all
--RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
--RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
-+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \
-+ wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
-+ echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
-+ else \
-+ echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
-+ fi'
-
- # Install misc pkgs
- RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-
-From f5a822d2012bc3e1cea1de97ff8189404688f84e Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 15:51:34 +0000
-Subject: [PATCH 6/8] Updating TF to acccount for the (ROCm 3.7) change in
- hipDeviceGetStreamPriorityRange
-
-Starting with ROCm 3.7, the `hipDeviceGetStreamPriorityRange` API returns a range of `[-1,1]`.
-This is a departure from the `[0,2]` range that was returned by this API in ROCm 3.3 and prior.
-
-Updating the TF unit test, that has checks based on the range returned by this API, to account for change in the returned range
----
- .../common_runtime/gpu/gpu_device_test.cc | 34 +++++--------------
- 1 file changed, 8 insertions(+), 26 deletions(-)
-
-diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-index 6448fc56af7a1..21c75244b5feb 100644
---- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
-@@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
- TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- {
- #if TENSORFLOW_USE_ROCM
-- // Priority outside the range (0, 2) for AMD GPUs
-+ // Priority outside the range (-1, 1) for AMD GPUs
- SessionOptions opts =
-- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
-+ MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}});
- #else
- // Priority outside the range (-2, 0) for NVidia GPUs
- SessionOptions opts =
-@@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
- ExpectErrorMessageSubstr(
- status,
-- "Priority -1 is outside the range of supported priorities [0,2] for"
-+ "Priority -2 is outside the range of supported priorities [-1,1] for"
- " virtual device 0 on GPU# 0");
- #else
- ExpectErrorMessageSubstr(
-@@ -254,8 +254,8 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- }
- {
- #if TENSORFLOW_USE_ROCM
-- // Priority outside the range (0, 2) for AMD GPUs
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
-+ // Priority outside the range (-1, 1) for AMD GPUs
-+ SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
- #else
- // Priority outside the range (-2, 0) for NVidia GPUs
- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
-@@ -267,7 +267,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
- #if TENSORFLOW_USE_ROCM
- ExpectErrorMessageSubstr(
- status,
-- "Priority 3 is outside the range of supported priorities [0,2] for"
-+ "Priority 2 is outside the range of supported priorities [-1,1] for"
- " virtual device 0 on GPU# 0");
- #else
- ExpectErrorMessageSubstr(
-@@ -288,26 +288,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
- }
-
- TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
--#if TENSORFLOW_USE_ROCM
-- // Valid range for priority values on AMD GPUs in (0,2)
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
--#else
-+ // Valid range for priority values on AMD GPUs in (-1,1)
- // Valid range for priority values on NVidia GPUs in (-2, 0)
- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
--#endif
- std::vector<std::unique_ptr<Device>> devices;
- TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
- opts, kDeviceNamePrefix, &devices));
- EXPECT_EQ(2, devices.size());
- EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
- EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
-- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
- EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
- ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
- ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
- EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
-@@ -339,27 +330,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
- }
- {
- // Multile virtual devices with matching priority.
--#if TENSORFLOW_USE_ROCM
-- // Valid range for priority values on AMD GPUs in (0,2)
-- SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
--#else
-+ // Valid range for priority values on AMD GPUs in (-1,1)
- // Valid range for priority values on NVidia GPUs in (-2, 0)
- SessionOptions opts =
- MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
--#endif
- std::vector<std::unique_ptr<Device>> devices;
- TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
- opts, kDeviceNamePrefix, &devices));
- EXPECT_EQ(2, devices.size());
- EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
- EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
--#if TENSORFLOW_USE_ROCM
-- EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
-- EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#else
- EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
- EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
--#endif
- }
- }
-
-
-From ae9e3bd2fb8c3e042742b8c534c9020732c2c66d Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 12 Aug 2020 23:05:32 +0000
-Subject: [PATCH 7/8] Commeting out subtests that are failing due to JIRA
- ticket 236756, and also removing the no_rocm tag from the tests that contain
- those subtests
-
----
- tensorflow/python/ops/parallel_for/math_test.py | 5 +++++
- tensorflow/python/ops/ragged/ragged_dispatch_test.py | 5 +++++
- 2 files changed, 10 insertions(+)
-
-diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
-index 933ce765cdbfa..367f40d341115 100644
---- a/tensorflow/python/ops/parallel_for/math_test.py
-+++ b/tensorflow/python/ops/parallel_for/math_test.py
-@@ -82,6 +82,11 @@ def test_unary_cwise_complex_ops(self):
- self._test_unary_cwise_ops(complex_ops, True)
-
- def test_unary_cwise_real_ops_1(self):
-+ if test.is_built_with_rocm():
-+ # TODO(rocm):
-+ # This fails on ROCm...see JIRA ticket 236756
-+ self.skipTest('Fails on ROCM')
-+
- real_ops = [
- lambda x: math_ops.acosh(1 + math_ops.square(x)),
- math_ops.abs,
-diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-index 0237624aa451d..7a1d7c1882af1 100644
---- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
-@@ -139,6 +139,11 @@ def assertSameShape(self, x, y):
- ]
- ) # pyformat: disable
- def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
-+ if test_util.IsBuiltWithROCm():
-+ # TODO(rocm):
-+ # This fails on ROCm...see JIRA ticket 236756
-+ self.skipTest('Fails on ROCM')
-+
- result = op(x, **extra_args)
-
- # Run the wrapped op on the dense values, for comparison.
-
-From d4b8e68a3675bfb2d7465205420bd5ad15701d0b Mon Sep 17 00:00:00 2001
-From: Deven Desai <deven.desai.amd@gmail.com>
-Date: Wed, 26 Aug 2020 22:01:18 +0000
-Subject: [PATCH 8/8] Adding no_rocm tag to unit-tests that will not pass with
- ROCm 3.7 until PR #42288 gets merged
-
----
- tensorflow/python/BUILD | 1 +
- tensorflow/python/keras/optimizer_v2/BUILD | 2 ++
- 2 files changed, 3 insertions(+)
-
-diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
-index a111237e0565d..5252ebbed6e4b 100644
---- a/tensorflow/python/BUILD
-+++ b/tensorflow/python/BUILD
-@@ -5423,6 +5423,7 @@ cuda_py_test(
- python_version = "PY3",
- shard_count = 10,
- tags = [
-+ "no_rocm",
- "no_windows_gpu",
- "noasan", # b/159332048
- "nomsan", # b/148630708
-diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
-index b208e2e1e1e6b..11966ce8211d2 100644
---- a/tensorflow/python/keras/optimizer_v2/BUILD
-+++ b/tensorflow/python/keras/optimizer_v2/BUILD
-@@ -157,6 +157,7 @@ cuda_py_test(
- size = "medium",
- srcs = ["adadelta_test.py"],
- shard_count = 4,
-+ tags = ["no_rocm"],
- deps = [
- ":optimizer_v2",
- "//tensorflow/python:client_testlib",
-@@ -298,6 +299,7 @@ cuda_py_test(
- size = "medium",
- srcs = ["rmsprop_test.py"],
- shard_count = 2,
-+ tags = ["no_rocm"],
- deps = [
- ":optimizer_v2",
- "//tensorflow/python:array_ops",