diff options
author | acxz | 2020-08-13 15:00:11 -0400 |
---|---|---|
committer | acxz | 2020-08-13 15:00:11 -0400 |
commit | f4e53a4adb9babe64752698a0829b1de92878ca4 (patch) | |
tree | 7be58115343bd64c5b9df84bd19f4793475081b6 | |
parent | 8810f407652894d1a8d49dc138ddc00bd1659a0e (diff) | |
download | aur-f4e53a4adb9babe64752698a0829b1de92878ca4.tar.gz |
add patch to fix undefined hipOccupancyBlock
-rw-r--r-- | .SRCINFO | 4 | ||||
-rw-r--r-- | PKGBUILD | 18 | ||||
-rw-r--r-- | fix_occupancy_block.patch | 87 |
3 files changed, 102 insertions, 7 deletions
@@ -1,7 +1,7 @@ pkgbase = tensorflow-rocm pkgdesc = Library for computation using data flow graphs for scalable machine learning pkgver = 2.3.0 - pkgrel = 6 + pkgrel = 7 url = https://www.tensorflow.org/ arch = x86_64 license = APACHE @@ -30,6 +30,7 @@ pkgbase = tensorflow-rocm source = fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch source = fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch source = fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch + source = fix_occupancy_block.patch sha512sums = 86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08 @@ -37,6 +38,7 @@ pkgbase = tensorflow-rocm sha512sums = SKIP sha512sums = SKIP sha512sums = SKIP + sha512sums = 88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126 pkgname = tensorflow-rocm pkgdesc = Library for computation using data flow graphs for scalable machine learning (with ROCM) @@ -8,7 +8,7 @@ pkgbase=tensorflow-rocm pkgname=(tensorflow-rocm tensorflow-opt-rocm python-tensorflow-rocm python-tensorflow-opt-rocm) pkgver=2.3.0 _pkgver=2.3.0 -pkgrel=6 +pkgrel=7 pkgdesc="Library for computation using data flow graphs for scalable machine learning" url="https://www.tensorflow.org/" license=('APACHE') @@ -21,10 +21,11 @@ optdepends=('tensorboard: Tensorflow visualization toolkit') source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz" numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch build-against-actual-mkl.patch - "fix_hip_hcc_path.patch"::"https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch" - "fix_hipcc_path.patch"::"https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch" - "fix_gpu_atomic_redef.patch"::"https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch" - "fix_ldexp_float.patch"::"https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch") + fix_hip_hcc_path.patch::https://github.com/tensorflow/tensorflow/commit/6175b78d8386bd6e5b2beebedb9f40e6b887d5a9.patch + fix_hipcc_path.patch::https://patch-diff.githubusercontent.com/raw/tensorflow/tensorflow/pull/42292.patch + fix_gpu_atomic_redef.patch::https://github.com/tensorflow/tensorflow/commit/c054f40f66fa625f51085a20c48554c61d05c5fd.patch + fix_ldexp_float.patch::https://github.com/tensorflow/tensorflow/commit/655ce09f679a90ecd561538227c703b42d0fc5fa.patch + fix_occupancy_block.patch) sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3dc08abcc5b88020124d221e6fb172b33bd5206e9c9db6bc8fbeed399917eac' 'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc' @@ -32,7 +33,8 @@ sha512sums=('86aa087ea84dac1ecc1023b23a378100d41cc6778ccd20404a4b955fc67cef11b3d 'SKIP' 'SKIP' 'SKIP' - 'SKIP') + 'SKIP' + '88c04ed7a766193687d7079102332e3c63d6f0accbda777836abe5e03e9ebb83fd1aeaa9e4adca70310ce18bf3c6c3907f1f8a11c13e67e3ef79497b91bbf126') get_pyver () { python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))' @@ -73,6 +75,10 @@ prepare() { # Fix ldexp float method patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_ldexp_float.patch + # Fix missing hipOccupancyMaxPotentialBlockSize method + # https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2 + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix_occupancy_block.patch + cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-rocm cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-rocm diff --git a/fix_occupancy_block.patch b/fix_occupancy_block.patch new file mode 100644 index 000000000000..137b4e56ea55 --- /dev/null +++ b/fix_occupancy_block.patch @@ -0,0 +1,87 @@ +From 22def20bae7be6d5b790b360abed5919385b16c2 Mon Sep 17 00:00:00 2001 +From: Christian Sigg <csigg@google.com> +Date: Mon, 29 Jun 2020 04:23:28 -0700 +Subject: [PATCH] New ROCm 3.5 RBE docker based on Ubuntu 18.04, re-enable RBE. + +Fix list of cxx_builtin_include_directories. Only a few are needed, but those are more complicated (mix of symlinked and real paths). + +Properly return error from crosstool wrapper. + +PiperOrigin-RevId: 318788040 +Change-Id: Ia66898e98a9a4d8fb479c7e75317f4114f6081e5 +--- + .bazelrc | 17 ++++ + tensorflow/core/util/gpu_launch_config.h | 40 ++------- + ....local-toolchain-ubuntu18.04-manylinux2010 | 34 ++++++++ + .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04 | 37 --------- + ...rocm-ubuntu18.04-manylinux2010-multipython | 79 ++++++++++++++++++ + .../bin/crosstool_wrapper_driver_rocm.tpl | 19 ++++- + third_party/gpus/rocm_configure.bzl | 83 +++---------------- + .../preconfig/generate/containers.bzl | 2 +- + .../toolchains/remote_config/configs.bzl | 12 +-- + .../toolchains/remote_config/containers.bzl | 10 ++- + 10 files changed, 184 insertions(+), 149 deletions(-) + create mode 100644 tensorflow/tools/ci_build/Dockerfile.local-toolchain-ubuntu18.04-manylinux2010 + delete mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 + create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython + +diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h +index 4dfaf333d4bf0..0b943e917da01 100644 +--- a/tensorflow/core/util/gpu_launch_config.h ++++ b/tensorflow/core/util/gpu_launch_config.h +@@ -168,18 +168,10 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count, + block_size_limit); + CHECK_EQ(err, cudaSuccess); + #elif TENSORFLOW_USE_ROCM +- // Earlier versions of this HIP routine incorrectly returned void. +- // TODO re-enable hipError_t error checking when HIP is fixed. +- // ROCm interface uses unsigned int, convert after checking +- uint32_t block_count_uint = 0; +- uint32_t thread_per_block_uint = 0; +- CHECK_GE(block_size_limit, 0); +- uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit); +- hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint, +- func, dynamic_shared_memory_size, +- block_size_limit_uint); +- block_count = static_cast<int>(block_count_uint); +- thread_per_block = static_cast<int>(thread_per_block_uint); ++ hipError_t err = hipOccupancyMaxPotentialBlockSize( ++ &block_count, &thread_per_block, func, dynamic_shared_memory_size, ++ block_size_limit); ++ CHECK_EQ(err, hipSuccess); + #endif + + block_count = +@@ -208,27 +200,13 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize( + cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &block_count, func, fixed_block_size, dynamic_shared_memory_size); + CHECK_EQ(err, cudaSuccess); +- block_count = std::min(block_count * d.getNumGpuMultiProcessors(), +- DivUp(work_element_count, fixed_block_size)); + #elif TENSORFLOW_USE_ROCM +- // ROCM TODO re-enable this after hipOccupancyMaxActiveBlocksPerMultiprocessor +- // is implemented +- // hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( +- // &block_count, &thread_per_block, func, dynamic_shared_memory_size, +- // block_size_limit); +- // CHECK_EQ(err, hipSuccess); +- +- // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&) +- // that the kernel is quite simple and will largely be memory-limited. +- const int physical_thread_count = std::min( +- d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(), +- work_element_count); +- // Assume the kernel be simple enough that it is okay to use 1024 threads +- // per workgroup. +- int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); +- block_count = std::min(DivUp(physical_thread_count, thread_per_block), +- d.getNumGpuMultiProcessors()); ++ hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( ++ &block_count, func, fixed_block_size, dynamic_shared_memory_size); ++ CHECK_EQ(err, hipSuccess); + #endif ++ block_count = std::min(block_count * d.getNumGpuMultiProcessors(), ++ DivUp(work_element_count, fixed_block_size)); + + config.virtual_thread_count = work_element_count; + config.thread_per_block = fixed_block_size; |