diff options
Diffstat (limited to 'c6769e20bf6096d5828e2590def2b25edb3189d6.patch')
-rw-r--r-- | c6769e20bf6096d5828e2590def2b25edb3189d6.patch | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/c6769e20bf6096d5828e2590def2b25edb3189d6.patch b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch new file mode 100644 index 000000000000..73e95908867f --- /dev/null +++ b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch @@ -0,0 +1,251 @@ +From c6769e20bf6096d5828e2590def2b25edb3189d6 Mon Sep 17 00:00:00 2001 +From: Christian Sigg <csigg@google.com> +Date: Mon, 17 Aug 2020 14:12:02 -0700 +Subject: [PATCH] Use CUB from the CUDA Toolkit starting with version 11.0. + +PiperOrigin-RevId: 327096097 +Change-Id: I444ec3ac3348f76728c931a4bb4aa1b7cbe1b673 +--- + tensorflow/core/kernels/BUILD | 8 ++--- + tensorflow/core/kernels/gpu_prim.h | 26 +++++++------- + tensorflow/core/util/BUILD | 2 +- + third_party/cub.BUILD | 1 - + third_party/cub.pr170.patch | 48 ------------------------- + third_party/gpus/cuda/BUILD.tpl | 6 ++++ + third_party/gpus/cuda/BUILD.windows.tpl | 5 +++ + third_party/gpus/cuda_configure.bzl | 7 ++++ + 8 files changed, 36 insertions(+), 67 deletions(-) + delete mode 100644 third_party/cub.pr170.patch + +diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD +index 88958cdaa9878..19dc5c73252a8 100644 +--- a/tensorflow/core/kernels/BUILD ++++ b/tensorflow/core/kernels/BUILD +@@ -490,7 +490,7 @@ cc_library( + name = "gpu_prim_hdrs", + hdrs = ["gpu_prim.h"], + deps = if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +@@ -3896,7 +3896,7 @@ tf_kernel_library( + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + "@local_config_cuda//cuda:cudnn_header", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", +@@ -3986,7 +3986,7 @@ tf_kernel_library( + ] + if_cuda_or_rocm([ + ":reduction_ops", + ]) + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + "//tensorflow/core:stream_executor", + "//tensorflow/stream_executor/cuda:cuda_stream", + ]) + if_rocm([ +@@ -4708,7 +4708,7 @@ tf_kernel_library( + ] + if_cuda_or_rocm([ + ":reduction_ops", + ]) + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h +index 82fcb21e0ac04..33c5df1ae2371 100644 +--- a/tensorflow/core/kernels/gpu_prim.h ++++ b/tensorflow/core/kernels/gpu_prim.h +@@ -15,19 +15,19 @@ limitations under the license, the license you must see. + #define TENSORFLOW_CORE_KERNELS_GPU_PRIM_H_ + + #if GOOGLE_CUDA +-#include "third_party/cub/block/block_load.cuh" +-#include "third_party/cub/block/block_scan.cuh" +-#include "third_party/cub/block/block_store.cuh" +-#include "third_party/cub/device/device_histogram.cuh" +-#include "third_party/cub/device/device_radix_sort.cuh" +-#include "third_party/cub/device/device_reduce.cuh" +-#include "third_party/cub/device/device_segmented_radix_sort.cuh" +-#include "third_party/cub/device/device_segmented_reduce.cuh" +-#include "third_party/cub/device/device_select.cuh" +-#include "third_party/cub/iterator/counting_input_iterator.cuh" +-#include "third_party/cub/iterator/transform_input_iterator.cuh" +-#include "third_party/cub/thread/thread_operators.cuh" +-#include "third_party/cub/warp/warp_reduce.cuh" ++#include "cub/block/block_load.cuh" ++#include "cub/block/block_scan.cuh" ++#include "cub/block/block_store.cuh" ++#include "cub/device/device_histogram.cuh" ++#include "cub/device/device_radix_sort.cuh" ++#include "cub/device/device_reduce.cuh" ++#include "cub/device/device_segmented_radix_sort.cuh" ++#include "cub/device/device_segmented_reduce.cuh" ++#include "cub/device/device_select.cuh" ++#include "cub/iterator/counting_input_iterator.cuh" ++#include "cub/iterator/transform_input_iterator.cuh" ++#include "cub/thread/thread_operators.cuh" ++#include "cub/warp/warp_reduce.cuh" + #include "third_party/gpus/cuda/include/cusparse.h" + + namespace gpuprim = ::cub; +diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD +index 4d2ff9a805811..241e382a650ba 100644 +--- a/tensorflow/core/util/BUILD ++++ b/tensorflow/core/util/BUILD +@@ -626,7 +626,7 @@ tf_kernel_library( + "//tensorflow/core:lib", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +diff --git a/third_party/cub.BUILD b/third_party/cub.BUILD +index a04347b21eefb..29159c9dad3d3 100644 +--- a/third_party/cub.BUILD ++++ b/third_party/cub.BUILD +@@ -20,7 +20,6 @@ filegroup( + cc_library( + name = "cub", + hdrs = if_cuda([":cub_header_files"]), +- include_prefix = "third_party", + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], +diff --git a/third_party/cub.pr170.patch b/third_party/cub.pr170.patch +deleted file mode 100644 +index 5b7432e885867..0000000000000 +--- a/third_party/cub.pr170.patch ++++ /dev/null +@@ -1,48 +0,0 @@ +-From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001 +-From: Artem Belevich <tra@google.com> +-Date: Mon, 23 Sep 2019 11:18:56 -0700 +-Subject: [PATCH] Fix CUDA version detection in CUB +- +-This fixes the problem with CUB using deprecated shfl/vote instructions when CUB +-is compiled with clang (e.g. some TensorFlow builds). +---- +- cub/util_arch.cuh | 3 ++- +- cub/util_type.cuh | 4 ++-- +- 2 files changed, 4 insertions(+), 3 deletions(-) +- +-diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh +-index 87c5ea2fb..9ad9d1cbb 100644 +---- a/cub/util_arch.cuh +-+++ b/cub/util_arch.cuh +-@@ -44,7 +44,8 @@ namespace cub { +- +- #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +- +--#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) +-+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \ +-+ (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- #define CUB_USE_COOPERATIVE_GROUPS +- #endif +- +-diff --git a/cub/util_type.cuh b/cub/util_type.cuh +-index 0ba41e1ed..b2433d735 100644 +---- a/cub/util_type.cuh +-+++ b/cub/util_type.cuh +-@@ -37,7 +37,7 @@ +- #include <limits> +- #include <cfloat> +- +--#if (__CUDACC_VER_MAJOR__ >= 9) +-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- #include <cuda_fp16.h> +- #endif +- +-@@ -1063,7 +1063,7 @@ struct FpLimits<double> +- }; +- +- +--#if (__CUDACC_VER_MAJOR__ >= 9) +-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- template <> +- struct FpLimits<__half> +- { +diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl +index e5833e7cdbbc2..a4a21abc36769 100644 +--- a/third_party/gpus/cuda/BUILD.tpl ++++ b/third_party/gpus/cuda/BUILD.tpl +@@ -176,6 +176,11 @@ cc_library( + ], + ) + ++alias( ++ name = "cub_headers", ++ actual = "%{cub_actual}" ++) ++ + cuda_header_library( + name = "cupti_headers", + hdrs = [":cuda-extras"], +@@ -224,3 +229,4 @@ py_library( + ) + + %{copy_rules} ++ +diff --git a/third_party/gpus/cuda/BUILD.windows.tpl b/third_party/gpus/cuda/BUILD.windows.tpl +index 55a9ec3d1ab10..cabfac28fc357 100644 +--- a/third_party/gpus/cuda/BUILD.windows.tpl ++++ b/third_party/gpus/cuda/BUILD.windows.tpl +@@ -171,6 +171,11 @@ cc_library( + ], + ) + ++alias( ++ name = "cub_headers", ++ actual = "%{cub_actual}" ++) ++ + cuda_header_library( + name = "cupti_headers", + hdrs = [":cuda-extras"], +diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl +index 70bb91159de1a..ea33963fe19fb 100644 +--- a/third_party/gpus/cuda_configure.bzl ++++ b/third_party/gpus/cuda_configure.bzl +@@ -692,6 +692,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + return struct( + cuda_toolkit_path = toolkit_path, + cuda_version = cuda_version, ++ cuda_version_major = cuda_major, + cublas_version = cublas_version, + cusolver_version = cusolver_version, + curand_version = curand_version, +@@ -776,6 +777,7 @@ def _create_dummy_repository(repository_ctx): + "%{curand_lib}": lib_name("curand", cpu_value), + "%{cupti_lib}": lib_name("cupti", cpu_value), + "%{cusparse_lib}": lib_name("cusparse", cpu_value), ++ "%{cub_actual}": ":cuda_headers", + "%{copy_rules}": """ + filegroup(name="cuda-include") + filegroup(name="cublas-include") +@@ -1122,6 +1124,10 @@ def _create_local_cuda_repository(repository_ctx): + }, + ) + ++ cub_actual = "@cub_archive//:cub" ++ if int(cuda_config.cuda_version_major) >= 11: ++ cub_actual = ":cuda_headers" ++ + repository_ctx.template( + "cuda/BUILD", + tpl_paths["cuda:BUILD"], +@@ -1137,6 +1143,7 @@ def _create_local_cuda_repository(repository_ctx): + "%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]), + "%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]), + "%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]), ++ "%{cub_actual}": cub_actual, + "%{copy_rules}": "\n".join(copy_rules), + }, + ) |