add patch to fix undefined hipOccupancyBlock

author: acxz 2020-08-13 15:00:11 -0400
committer: acxz 2020-08-13 15:00:11 -0400
commit: f4e53a4adb9babe64752698a0829b1de92878ca4 (patch)
tree: 7be58115343bd64c5b9df84bd19f4793475081b6
parent: 8810f407652894d1a8d49dc138ddc00bd1659a0e (diff)
download: aur-f4e53a4adb9babe64752698a0829b1de92878ca4.tar.gz
3 files changed, 102 insertions, 7 deletions
diff --git a/.SRCINFO b/.SRCINFO
index aee8f887faa9..b9623c93d0b3 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
 pkgbase = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning
 	pkgver = 2.3.0
-	pkgrel = 6
+	pkgrel = 7
 	url = https://www.tensorflow.org/
 	arch = x86_64
 	license = APACHE
@@ -30,6 +30,7 @@ pkgbase = tensorflow-rocm
 	source = fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch
 	source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
 	source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
+	source = fix_occupancy_block.patch
 	sha512sums = 86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac
 	sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
 	sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
@@ -37,6 +38,7 @@ pkgbase = tensorflow-rocm
 	sha512sums = SKIP
 	sha512sums = SKIP
 	sha512sums = SKIP
+	sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126
 
 pkgname = tensorflow-rocm
 	pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM)
diff --git a/PKGBUILD b/PKGBUILD
index 3449da17d18e..e39cc82a7514 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -8,7 +8,7 @@ pkgbase=tensorflow-rocm
 pkgname=(tensorflow-rocm tensorflow-opt-rocm python-tensorflow-rocm python-tensorflow-opt-rocm)
 pkgver=2.3.0
 _pkgver=2.3.0
-pkgrel=6
+pkgrel=7
 pkgdesc="Library for computation using data flow graphs for scalable machine learning"
 url="https://www.tensorflow.org/"
 license=('APACHE')
@@ -21,10 +21,11 @@ optdepends=('tensorboard: Tensorflow visualization toolkit')
 source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz"
         numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
         build-against-actual-mkl.patch
-        "fix_hip_hcc_path.patch"::"https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch"
-        "fix_hipcc_path.patch"::"https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch"
-        "fix_gpu_atomic_redef.patch"::"https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch"
-        "fix_ldexp_float.patch"::"https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch")
+        fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch
+        fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch
+        fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch
+        fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch
+        fix_occupancy_block.patch)
 
 sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac'
             'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
@@ -32,7 +33,8 @@ sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3d
             'SKIP'
             'SKIP'
             'SKIP'
-            'SKIP')
+            'SKIP'
+            '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126')
 
 get_pyver () {
   python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
@@ -73,6 +75,10 @@ prepare() {
   # Fix ldexp float method
   patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch
 
+  # Fix missing hipOccupancyMaxPotentialBlockSize method
+  # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2
+  patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch
+
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm
   cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm
 
diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch
new file mode 100644
index 000000000000..137b4e56ea55
--- /dev/null
+++ b/fix_occupancy_block.patch
@@ -0,0 +1,87 @@
+From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001
+From: Christian Sigg <csigg@google.com>
+Date: Mon, 29 Jun 2020 04:23:28 -0700
+Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE.
+
+Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths).
+
+Properly return error from crosstool wrapper.
+
+PiperOrigin-RevId: 318788040
+Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5
+---
+ .bazelrc                                      | 17 ++++
+ tensorflow/core/util/gpu_launch_config.h      | 40 ++-------
+ ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++
+ .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04  | 37 ---------
+ ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++
+ .../bin/crosstool_wrapper_driver_rocm.tpl     | 19 ++++-
+ third_party/gpus/rocm_configure.bzl           | 83 +++----------------
+ .../preconfig/generate/containers.bzl         |  2 +-
+ .../toolchains/remote_config/configs.bzl      | 12 +--
+ .../toolchains/remote_config/containers.bzl   | 10 ++-
+ 10 files changed, 184 insertions(+), 149 deletions(-)
+ create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010
+ delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
+ create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython
+
+diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h
+index 4dfaf333d4bf0..0b943e917da01 100644
+--- a/tensorflow/core/util/gpu_launch_config.h
++++ b/tensorflow/core/util/gpu_launch_config.h
+@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
+       block_size_limit);
+   CHECK_EQ(err, cudaSuccess);
+ #elif TENSORFLOW_USE_ROCM
+-  // Earlier versions of this HIP routine incorrectly returned void.
+-  // TODO re-enable hipError_t error checking when HIP is fixed.
+-  // ROCm interface uses unsigned int, convert after checking
+-  uint32_t block_count_uint = 0;
+-  uint32_t thread_per_block_uint = 0;
+-  CHECK_GE(block_size_limit, 0);
+-  uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit);
+-  hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint,
+-                                    func, dynamic_shared_memory_size,
+-                                    block_size_limit_uint);
+-  block_count = static_cast<int>(block_count_uint);
+-  thread_per_block = static_cast<int>(thread_per_block_uint);
++  hipError_t err = hipOccupancyMaxPotentialBlockSize(
++      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
++      block_size_limit);
++  CHECK_EQ(err, hipSuccess);
+ #endif
+
+   block_count =
+@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
+   cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+       &block_count, func, fixed_block_size, dynamic_shared_memory_size);
+   CHECK_EQ(err, cudaSuccess);
+-  block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
+-                         DivUp(work_element_count, fixed_block_size));
+ #elif TENSORFLOW_USE_ROCM
+-  // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor
+-  // is implemented
+-  // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
+-  //    &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+-  //    block_size_limit);
+-  // CHECK_EQ(err, hipSuccess);
+-
+-  // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&)
+-  // that the kernel is quite simple and will largely be memory-limited.
+-  const int physical_thread_count = std::min(
+-      d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(),
+-      work_element_count);
+-  // Assume the kernel be simple enough that it is okay to use 1024 threads
+-  // per workgroup.
+-  int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock());
+-  block_count = std::min(DivUp(physical_thread_count, thread_per_block),
+-                         d.getNumGpuMultiProcessors());
++  hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor(
++      &block_count, func, fixed_block_size, dynamic_shared_memory_size);
++  CHECK_EQ(err, hipSuccess);
+ #endif
++  block_count = std::min(block_count * d.getNumGpuMultiProcessors(),
++                         DivUp(work_element_count, fixed_block_size));
+
+   config.virtual_thread_count = work_element_count;
+   config.thread_per_block = fixed_block_size;
author	acxz	2020-08-13 15:00:11 -0400
committer	acxz	2020-08-13 15:00:11 -0400
commit	f4e53a4adb9babe64752698a0829b1de92878ca4 (patch)
tree	7be58115343bd64c5b9df84bd19f4793475081b6
parent	8810f407652894d1a8d49dc138ddc00bd1659a0e (diff)
download	aur-f4e53a4adb9babe64752698a0829b1de92878ca4.tar.gz