summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authoracxz2020-08-13 15:00:11 -0400
committeracxz2020-08-13 15:00:11 -0400
commitf4e53a4adb9babe64752698a0829b1de92878ca4 (patch)
tree7be58115343bd64c5b9df84bd19f4793475081b6
parent8810f407652894d1a8d49dc138ddc00bd1659a0e (diff)
downloadaur-f4e53a4adb9babe64752698a0829b1de92878ca4.tar.gz
add patch to fix undefined hipOccupancyBlock
-rw-r--r--.SRCINFO4
-rw-r--r--PKGBUILD18
-rw-r--r--fix_occupancy_block.patch87
3 files changed, 102 insertions, 7 deletions
diff --git a/.SRCINFO b/.SRCINFO
index aee8f887faa9..b9623c93d0b3 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
pkgbase = tensorflow-rocm
pkgdesc = Library for computation using data flow graphs for scalable machine learning
pkgver = 2.3.0
- pkgrel = 6
+ pkgrel = 7
url = https://www.tensorflow.org/
arch = x86_64
license = APACHE
@@ -30,6 +30,7 @@ pkgbase = tensorflow-rocm
source = fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch
source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
+ source = fix_occupancy_block.patch
sha512sums = 86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac
sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
@@ -37,6 +38,7 @@ pkgbase = tensorflow-rocm
sha512sums = SKIP
sha512sums = SKIP
sha512sums = SKIP
+ sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126
pkgname = tensorflow-rocm
pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM)
diff --git a/PKGBUILD b/PKGBUILD
index 3449da17d18e..e39cc82a7514 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -8,7 +8,7 @@ pkgbase=tensorflow-rocm
pkgname=(tensorflow-rocm tensorflow-opt-rocm python-tensorflow-rocm python-tensorflow-opt-rocm)
pkgver=2.3.0
_pkgver=2.3.0
-pkgrel=6
+pkgrel=7
pkgdesc="Library for computation using data flow graphs for scalable machine learning"
url="https://www.tensorflow.org/"
license=('APACHE')
@@ -21,10 +21,11 @@ optdepends=('tensorboard: Tensorflow visualization toolkit')
source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz"
numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
build-against-actual-mkl.patch
- "fix_hip_hcc_path.patch"::"https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch"
- "fix_hipcc_path.patch"::"https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch"
- "fix_gpu_atomic_redef.patch"::"https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch"
- "fix_ldexp_float.patch"::"https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch")
+ fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
+ fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch
+ fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
+ fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
+ fix_occupancy_block.patch)
sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac'
'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
@@ -32,7 +33,8 @@ sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3d
'SKIP'
'SKIP'
'SKIP'
- 'SKIP')
+ 'SKIP'
+ '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126')
get_pyver () {
python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
@@ -73,6 +75,10 @@ prepare() {
# Fix ldexp float method
patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch
+ # Fix missing hipOccupancyMaxPotentialBlockSize method
+ # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch
+
cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm
cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm
diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch
new file mode 100644
index 000000000000..137b4e56ea55
--- /dev/null
+++ b/fix_occupancy_block.patch
@@ -0,0 +1,87 @@
+From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001
+From: Christian Sigg <csigg@google.com>
+Date: Mon, 29 Jun 2020 04:23:28 -0700
+Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE.
+
+Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths).
+
+Properly return error from crosstool wrapper.
+
+PiperOrigin-RevId: 318788040
+Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5
+---
+ .bazelrc | 17 ++++
+ tensorflow/core/util/gpu_launch_config.h | 40 ++-------
+ ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++
+ .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04 | 37 ---------
+ ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++
+ .../bin/crosstool_wrapper_driver_rocm.tpl | 19 ++++-
+ third_party/gpus/rocm_configure.bzl | 83 +++----------------
+ .../preconfig/generate/containers.bzl | 2 +-
+ .../toolchains/remote_config/configs.bzl | 12 +--
+ .../toolchains/remote_config/containers.bzl | 10 ++-
+ 10 files changed, 184 insertions(+), 149 deletions(-)
+ create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010
+ delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
+ create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython
+
+diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h
+index 4dfaf333d4bf0..0b943e917da01 100644
+--- a/tensorflow/core/util/gpu_launch_config.h
++++ b/tensorflow/core/util/gpu_launch_config.h
+@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
+ block_size_limit);
+ CHECK_EQ(err, cudaSuccess);
+ #elif TENSORFLOW_USE_ROCM
+- // Earlier versions of this HIP routine incorrectly returned void.
+- // TODO re-enable hipError_t error checking when HIP is fixed.
+- // ROCm interface uses unsigned int, convert after checking
+- uint32_t block_count_uint = 0;
+- uint32_t thread_per_block_uint = 0;
+- CHECK_GE(block_size_limit, 0);
+- uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit);
+- hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint,
+- func, dynamic_shared_memory_size,
+- block_size_limit_uint);
+- block_count = static_cast<int>(block_count_uint);
+- thread_per_block = static_cast<int>(thread_per_block_uint);
++ hipError_t err = hipOccupancyMaxPotentialBlockSize(
++ &block_count, &thread_per_block, func, dynamic_shared_memory_size,
++ block_size_limit);
++ CHECK_EQ(err, hipSuccess);
+ #endif
+
+ block_count =
+@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
+ cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+ &block_count, func, fixed_block_size, dynamic_shared_memory_size);
+ CHECK_EQ(err, cudaSuccess);
+- block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
+- DivUp(work_element_count, fixed_block_size));
+ #elif TENSORFLOW_USE_ROCM
+- // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor
+- // is implemented
+- // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
+- // &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+- // block_size_limit);
+- // CHECK_EQ(err, hipSuccess);
+-
+- // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&)
+- // that the kernel is quite simple and will largely be memory-limited.
+- const int physical_thread_count = std::min(
+- d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(),
+- work_element_count);
+- // Assume the kernel be simple enough that it is okay to use 1024 threads
+- // per workgroup.
+- int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock());
+- block_count = std::min(DivUp(physical_thread_count, thread_per_block),
+- d.getNumGpuMultiProcessors());
++ hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
++ &block_count, func, fixed_block_size, dynamic_shared_memory_size);
++ CHECK_EQ(err, hipSuccess);
+ #endif
++ block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
++ DivUp(work_element_count, fixed_block_size));
+
+ config.virtual_thread_count = work_element_count;
+ config.thread_per_block = fixed_block_size;