diff options
author | Antoine Viallon | 2020-11-17 18:22:30 +0100 |
---|---|---|
committer | Antoine Viallon | 2020-11-17 18:22:30 +0100 |
commit | 409239779029b5fab891b4d4259d65b4bc5cd1c2 (patch) | |
tree | a6351a21a864ba8981cdbafdcacd5235a1c2998b | |
download | aur-409239779029b5fab891b4d4259d65b4bc5cd1c2.tar.gz |
Initial commit
-rw-r--r-- | .SRCINFO | 137 | ||||
-rw-r--r-- | PKGBUILD | 258 | ||||
-rw-r--r-- | build-against-actual-mkl.patch | 37 | ||||
-rw-r--r-- | c6769e20bf6096d5828e2590def2b25edb3189d6.patch | 251 | ||||
-rw-r--r-- | cuda11.1.patch | 136 | ||||
-rw-r--r-- | fix-h5py3.0.patch | 18 | ||||
-rw-r--r-- | numpy1.20.patch | 45 |
7 files changed, 882 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO new file mode 100644 index 000000000000..aa97cd75998d --- /dev/null +++ b/.SRCINFO @@ -0,0 +1,137 @@ +pkgbase = tensorflow-opt-native + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CFLAGS) + pkgver = 2.3.1 + pkgrel = 5 + url = https://www.tensorflow.org/ + arch = x86_64 + license = APACHE + makedepends = bazel + makedepends = python-numpy + makedepends = cuda + makedepends = nvidia-utils + makedepends = nccl + makedepends = git + makedepends = gcc9 + makedepends = cudnn + makedepends = python-pip + makedepends = python-wheel + makedepends = python-setuptools + makedepends = python-h5py + makedepends = python-keras-applications + makedepends = python-keras-preprocessing + makedepends = cython + depends = c-ares + depends = intel-mkl + depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo + optdepends = tensorboard: Tensorflow visualization toolkit + source = tensorflow-opt-native-2.3.1.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.3.1.tar.gz + source = https://github.com/tensorflow/tensorflow/commit/c6769e20bf6096d5828e2590def2b25edb3189d6.patch + source = numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch + source = cuda11.1.patch + source = fix-h5py3.0.patch + source = build-against-actual-mkl.patch + sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0 + sha512sums = 9dcaef0dd4fdd7008a27e383ef87c97990ba883a3094f214f821a039994933ec6ec47f5a832570e5c4b783e0493ce2236e7957e596395c4dee40f9bf2621ff2f + sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc + sha512sums = 0caa5170c22fdea2c708ecdb2c980d668464d9dba9c9730a9ec5e9258572576d783fa1d19da04d5f9d2b06aed36d30971526f212fc64c53d09c3e821bd1a3b5d + sha512sums = 9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a + sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08 + +pkgname = tensorflow-opt-native + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CPU optimizations) + provides = tensorflow + conflicts = tensorflow + +pkgname = tensorflow-opt-native-cuda + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations) + depends = c-ares + depends = intel-mkl + depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo + depends = cuda + depends = cudnn + depends = nccl + provides = tensorflow + provides = tensorflow-cuda + conflicts = tensorflow + +pkgname = python-tensorflow-opt-native + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CPU optimizations) + depends = c-ares + depends = intel-mkl + depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo + depends = tensorflow-opt + depends = python-termcolor + depends = python-astor + depends = python-gast + depends = python-numpy + depends = python-protobuf + depends = absl-py + depends = python-h5py + depends = python-keras-applications + depends = python-keras-preprocessing + depends = python-tensorflow-estimator + depends = python-opt_einsum + depends = python-astunparse + depends = python-pasta + provides = python-tensorflow + conflicts = python-tensorflow + +pkgname = python-tensorflow-opt-native-cuda + pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations) + depends = c-ares + depends = intel-mkl + depends = onednn + depends = pybind11 + depends = openssl-1.0 + depends = lmdb + depends = libpng + depends = curl + depends = giflib + depends = icu + depends = libjpeg-turbo + depends = tensorflow-opt-native-cuda + depends = python-termcolor + depends = python-astor + depends = python-gast + depends = python-numpy + depends = cuda + depends = cudnn + depends = python-pycuda + depends = python-protobuf + depends = absl-py + depends = nccl + depends = python-h5py + depends = python-keras-applications + depends = python-keras-preprocessing + depends = python-tensorflow-estimator + depends = python-opt_einsum + depends = python-astunparse + depends = python-pasta + provides = python-tensorflow + provides = python-tensorflow-cuda + conflicts = python-tensorflow + diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 000000000000..75ab30693e44 --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,258 @@ +# Maintainer: Antoine Viallon <antoine@lesviallon.fr> +# Maintainer (repos): Sven-Hendrik Haase <svenstaro@gmail.com> +# Maintainer (repos): Konstantin Gizdov (kgizdov) <arch@kge.pw> +# Contributor: Adria Arrufat (archdria) <adria.arrufat+AUR@protonmail.ch> +# Contributor: Thibault Lorrain (fredszaq) <fredszaq@gmail.com> + +pkgbase=tensorflow-opt-native +pkgname=(tensorflow-opt-native tensorflow-opt-native-cuda python-tensorflow-opt-native python-tensorflow-opt-native-cuda) +pkgver=2.3.1 +_pkgver=2.3.1 +pkgrel=5 +pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CFLAGS)" +url="https://www.tensorflow.org/" +license=('APACHE') +arch=('x86_64') +depends=('c-ares' 'intel-mkl' 'onednn' 'pybind11' 'openssl-1.0' 'lmdb' 'libpng' 'curl' 'giflib' 'icu' 'libjpeg-turbo') +makedepends=('bazel' 'python-numpy' 'cuda' 'nvidia-utils' 'nccl' 'git' 'gcc9' + 'cudnn' 'python-pip' 'python-wheel' 'python-setuptools' 'python-h5py' + 'python-keras-applications' 'python-keras-preprocessing' + 'cython') +optdepends=('tensorboard: Tensorflow visualization toolkit') +source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz" + https://github.com/tensorflow/tensorflow/commit/c6769e20bf6096d5828e2590def2b25edb3189d6.patch + numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch + cuda11.1.patch + fix-h5py3.0.patch + build-against-actual-mkl.patch) +sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0' + '9dcaef0dd4fdd7008a27e383ef87c97990ba883a3094f214f821a039994933ec6ec47f5a832570e5c4b783e0493ce2236e7957e596395c4dee40f9bf2621ff2f' + 'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc' + '0caa5170c22fdea2c708ecdb2c980d668464d9dba9c9730a9ec5e9258572576d783fa1d19da04d5f9d2b06aed36d30971526f212fc64c53d09c3e821bd1a3b5d' + '9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a' + 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08') + +get_pyver () { + python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))' +} + +check_dir() { + if [ -d "${1}" ]; then + return 0 + else + >&2 echo Directory "${1}" does not exist or is a file! Exiting... + exit 1 + fi +} + +prepare() { + # Allow any bazel version + echo "*" > tensorflow-${_pkgver}/.bazelversion + + # Tensorflow actually wants to build against a slimmed down version of Intel MKL called MKLML + # See https://github.com/intel/mkl-dnn/issues/102 + # MKLML version that Tensorflow wants to use is https://github.com/intel/mkl-dnn/releases/tag/v0.21 + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch + + # Fix wrong SONAME being shipped in CUDA 11.1 + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/cuda11.1.patch + + # Compile with C++17 by default (FS#65953) + #sed -i "s/c++14/c++17/g" tensorflow-${_pkgver}/.bazelrc + + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/numpy1.20.patch + + # FS#68488 + patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix-h5py3.0.patch + + cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-native + cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-native-cuda + + # These environment variables influence the behavior of the configure call below. + export PYTHON_BIN_PATH=/usr/bin/python + export USE_DEFAULT_PYTHON_LIB_PATH=1 + export TF_NEED_JEMALLOC=1 + export TF_NEED_KAFKA=1 + export TF_NEED_OPENCL_SYCL=0 + export TF_NEED_AWS=1 + export TF_NEED_GCP=1 + export TF_NEED_HDFS=1 + export TF_NEED_S3=1 + export TF_ENABLE_XLA=1 + export TF_NEED_GDR=0 + export TF_NEED_VERBS=0 + export TF_NEED_OPENCL=0 + export TF_NEED_MPI=0 + export TF_NEED_TENSORRT=0 + export TF_NEED_NGRAPH=0 + export TF_NEED_IGNITE=0 + export TF_NEED_ROCM=0 + # See https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl + export TF_SYSTEM_LIBS="boringssl,curl,cython,gif,icu,libjpeg_turbo,lmdb,nasm,pcre,png,pybind11,zlib" + export TF_SET_ANDROID_WORKSPACE=0 + export TF_DOWNLOAD_CLANG=0 + export TF_NCCL_VERSION=2.7 + export TF_IGNORE_MAX_BAZEL_VERSION=1 + export TF_MKL_ROOT=/opt/intel/mkl + export NCCL_INSTALL_PATH=/usr + export GCC_HOST_COMPILER_PATH=/usr/bin/gcc-9 + export HOST_C_COMPILER=/usr/bin/gcc-9 + export HOST_CXX_COMPILER=/usr/bin/g++-9 + export TF_CUDA_CLANG=0 # Clang currently disabled because it's not compatible at the moment. + export CLANG_CUDA_COMPILER_PATH=/usr/bin/clang + export TF_CUDA_PATHS=/opt/cuda,/usr/lib,/usr + export TF_CUDA_VERSION=$(/opt/cuda/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p') + export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' /usr/include/cudnn_version.h) + if [[ -z "$COMPUTE_CAPABILITIES" ]]; then + COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0,8.6 + else + echo -e "\e[1mCUDA compute capabilites overriden by env: \e[0m${COMPUTE_CAPABILITIES}" + fi + export TF_CUDA_COMPUTE_CAPABILITIES=$COMPUTE_CAPABILITIES + + # Required until https://github.com/tensorflow/tensorflow/issues/39467 is fixed. + export CC=gcc-9 + export CXX=g++-9 + + export BAZEL_ARGS="--config=mkl -c opt --copt=-I/usr/include/openssl-1.0 --host_copt=-I/usr/include/openssl-1.0 --linkopt=-l:libssl.so.1.0.0 --linkopt=-l:libcrypto.so.1.0.0 --host_linkopt=-l:libssl.so.1.0.0 --host_linkopt=-l:libcrypto.so.1.0.0" +} + +build() { + echo "Building without cuda and with native optimizations" + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native + export CC_OPT_FLAGS="$CFLAGS -O3 -funsafe-math-optimizations" + + _copts=() + for copt in $CC_OPT_FLAGS; do + _copts+=(--copt="$copt") + done + echo ${_copts} + echo ${_copts[@]} + + export TF_NEED_CUDA=0 + ./configure + bazel \ + build --config=avx2_linux \ + ${BAZEL_ARGS[@]} \ + ${_copts[@]} \ + //tensorflow:libtensorflow.so \ + //tensorflow:libtensorflow_cc.so \ + //tensorflow:install_headers \ + //tensorflow/tools/pip_package:build_pip_package + bazel-bin/tensorflow/tools/pip_package/build_pip_package "${srcdir}"/tmpopt + + + echo "Building with cuda and with non-x86-64 optimizations" + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda + export CC_OPT_FLAGS="$CFLAGS -O3 -funsafe-math-optimizations" + export TF_NEED_CUDA=1 + export TF_CUDA_CLANG=0 + ./configure + bazel \ + build --config=avx2_linux \ + ${BAZEL_ARGS[@]} \ + ${_copts[@]} \ + //tensorflow:libtensorflow.so \ + //tensorflow:libtensorflow_cc.so \ + //tensorflow:install_headers \ + //tensorflow/tools/pip_package:build_pip_package + bazel-bin/tensorflow/tools/pip_package/build_pip_package --gpu "${srcdir}"/tmpoptcuda +} + +_package() { + # install headers first + install -d "${pkgdir}"/usr/include/tensorflow + cp -r bazel-bin/tensorflow/include/* "${pkgdir}"/usr/include/tensorflow/ + # install python-version to get all extra headers + WHEEL_PACKAGE=$(find "${srcdir}"/$1 -name "tensor*.whl") + pip install --ignore-installed --upgrade --root "${pkgdir}"/ $WHEEL_PACKAGE --no-dependencies + # move extra headers to correct location + local _srch_path="${pkgdir}/usr/lib/python$(get_pyver)"/site-packages/tensorflow/include + check_dir "${_srch_path}" # we need to quit on broken search paths + find "${_srch_path}" -maxdepth 1 -mindepth 1 -type d -print0 | while read -rd $'\0' _folder; do + cp -nr "${_folder}" "${pkgdir}"/usr/include/tensorflow/ + done + # clean up unneeded files + rm -rf "${pkgdir}"/usr/bin + rm -rf "${pkgdir}"/usr/lib + rm -rf "${pkgdir}"/usr/share + + # install the rest of tensorflow + tensorflow/c/generate-pc.sh --prefix=/usr --version=${pkgver} + sed -e 's@/include$@/include/tensorflow@' -i tensorflow.pc -i tensorflow_cc.pc + install -Dm644 tensorflow.pc "${pkgdir}"/usr/lib/pkgconfig/tensorflow.pc + install -Dm644 tensorflow_cc.pc "${pkgdir}"/usr/lib/pkgconfig/tensorflow_cc.pc + install -Dm755 bazel-bin/tensorflow/libtensorflow.so "${pkgdir}"/usr/lib/libtensorflow.so.${pkgver} + ln -s libtensorflow.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow.so.${pkgver:0:1} + ln -s libtensorflow.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow.so + install -Dm755 bazel-bin/tensorflow/libtensorflow_cc.so "${pkgdir}"/usr/lib/libtensorflow_cc.so.${pkgver} + ln -s libtensorflow_cc.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow_cc.so.${pkgver:0:1} + ln -s libtensorflow_cc.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow_cc.so + install -Dm755 bazel-bin/tensorflow/libtensorflow_framework.so "${pkgdir}"/usr/lib/libtensorflow_framework.so.${pkgver} + ln -s libtensorflow_framework.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow_framework.so.${pkgver:0:1} + ln -s libtensorflow_framework.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow_framework.so + install -Dm644 tensorflow/c/c_api.h "${pkgdir}"/usr/include/tensorflow/tensorflow/c/c_api.h + install -Dm644 LICENSE "${pkgdir}"/usr/share/licenses/${pkgname}/LICENSE +} + +_python_package() { + WHEEL_PACKAGE=$(find "${srcdir}"/$1 -name "tensor*.whl") + pip install --ignore-installed --upgrade --root "${pkgdir}"/ $WHEEL_PACKAGE --no-dependencies + + # create symlinks to headers + local _srch_path="${pkgdir}/usr/lib/python$(get_pyver)"/site-packages/tensorflow/include/ + check_dir "${_srch_path}" # we need to quit on broken search paths + find "${_srch_path}" -maxdepth 1 -mindepth 1 -type d -print0 | while read -rd $'\0' _folder; do + rm -rf "${_folder}" + _smlink="$(basename "${_folder}")" + ln -s /usr/include/tensorflow/"${_smlink}" "${_srch_path}" + done + + # tensorboard has been separated from upstream but they still install it with + # tensorflow. I don't know what kind of sense that makes but we have to clean + # it out from this pacakge. + rm -rf "${pkgdir}"/usr/bin/tensorboard + + install -Dm644 LICENSE "${pkgdir}"/usr/share/licenses/${pkgname}/LICENSE +} + +package_tensorflow-opt-native() { + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)" + conflicts=(tensorflow) + provides=(tensorflow) + + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native + _package tmpopt +} + +package_tensorflow-opt-native-cuda() { + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)" + depends+=(cuda cudnn nccl) + conflicts=(tensorflow) + provides=(tensorflow tensorflow-cuda) + + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda + _package tmpoptcuda +} + +package_python-tensorflow-opt-native() { + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)" + depends+=(tensorflow-opt python-termcolor python-astor python-gast python-numpy python-protobuf absl-py python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-pasta) + conflicts=(python-tensorflow) + provides=(python-tensorflow) + + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native + _python_package tmpopt +} + +package_python-tensorflow-opt-native-cuda() { + pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)" + depends+=(tensorflow-opt-native-cuda python-termcolor python-astor python-gast python-numpy cuda cudnn python-pycuda python-protobuf absl-py nccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-pasta) + conflicts=(python-tensorflow) + provides=(python-tensorflow python-tensorflow-cuda) + + cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda + _python_package tmpoptcuda +} + +# vim:set ts=2 sw=2 et: diff --git a/build-against-actual-mkl.patch b/build-against-actual-mkl.patch new file mode 100644 index 000000000000..c1589b568751 --- /dev/null +++ b/build-against-actual-mkl.patch @@ -0,0 +1,37 @@ +diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl +index 4b8fb83eb0..f4e1adfb22 100644 +--- a/third_party/mkl/build_defs.bzl ++++ b/third_party/mkl/build_defs.bzl +@@ -124,7 +124,7 @@ def _mkl_autoconf_impl(repository_ctx): + if _enable_local_mkl(repository_ctx): + # Symlink lib and include local folders. + mkl_root = repository_ctx.os.environ[_TF_MKL_ROOT] +- mkl_lib_path = "%s/lib" % mkl_root ++ mkl_lib_path = "%s/lib/intel64" % mkl_root + repository_ctx.symlink(mkl_lib_path, "lib") + mkl_include_path = "%s/include" % mkl_root + repository_ctx.symlink(mkl_include_path, "include") +diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD +index 72370182c4..4972bb005e 100644 +--- a/third_party/mkl/mkl.BUILD ++++ b/third_party/mkl/mkl.BUILD +@@ -5,7 +5,6 @@ exports_files(["license.txt"]) + filegroup( + name = "LICENSE", + srcs = [ +- "license.txt", + ], + visibility = ["//visibility:public"], + ) +@@ -21,7 +20,10 @@ cc_library( + name = "mkl_libs_linux", + srcs = [ + "lib/libiomp5.so", +- "lib/libmklml_intel.so", ++ "lib/libmkl_core.so", ++ "lib/libmkl_rt.so", ++ "lib/libmkl_intel_thread.so", ++ "lib/libmkl_intel_lp64.so", + ], + visibility = ["//visibility:public"], + ) diff --git a/c6769e20bf6096d5828e2590def2b25edb3189d6.patch b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch new file mode 100644 index 000000000000..73e95908867f --- /dev/null +++ b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch @@ -0,0 +1,251 @@ +From c6769e20bf6096d5828e2590def2b25edb3189d6 Mon Sep 17 00:00:00 2001 +From: Christian Sigg <csigg@google.com> +Date: Mon, 17 Aug 2020 14:12:02 -0700 +Subject: [PATCH] Use CUB from the CUDA Toolkit starting with version 11.0. + +PiperOrigin-RevId: 327096097 +Change-Id: I444ec3ac3348f76728c931a4bb4aa1b7cbe1b673 +--- + tensorflow/core/kernels/BUILD | 8 ++--- + tensorflow/core/kernels/gpu_prim.h | 26 +++++++------- + tensorflow/core/util/BUILD | 2 +- + third_party/cub.BUILD | 1 - + third_party/cub.pr170.patch | 48 ------------------------- + third_party/gpus/cuda/BUILD.tpl | 6 ++++ + third_party/gpus/cuda/BUILD.windows.tpl | 5 +++ + third_party/gpus/cuda_configure.bzl | 7 ++++ + 8 files changed, 36 insertions(+), 67 deletions(-) + delete mode 100644 third_party/cub.pr170.patch + +diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD +index 88958cdaa9878..19dc5c73252a8 100644 +--- a/tensorflow/core/kernels/BUILD ++++ b/tensorflow/core/kernels/BUILD +@@ -490,7 +490,7 @@ cc_library( + name = "gpu_prim_hdrs", + hdrs = ["gpu_prim.h"], + deps = if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +@@ -3896,7 +3896,7 @@ tf_kernel_library( + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + "@local_config_cuda//cuda:cudnn_header", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", +@@ -3986,7 +3986,7 @@ tf_kernel_library( + ] + if_cuda_or_rocm([ + ":reduction_ops", + ]) + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + "//tensorflow/core:stream_executor", + "//tensorflow/stream_executor/cuda:cuda_stream", + ]) + if_rocm([ +@@ -4708,7 +4708,7 @@ tf_kernel_library( + ] + if_cuda_or_rocm([ + ":reduction_ops", + ]) + if_cuda([ +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h +index 82fcb21e0ac04..33c5df1ae2371 100644 +--- a/tensorflow/core/kernels/gpu_prim.h ++++ b/tensorflow/core/kernels/gpu_prim.h +@@ -15,19 +15,19 @@ limitations under the license, the license you must see. + #define TENSORFLOW_CORE_KERNELS_GPU_PRIM_H_ + + #if GOOGLE_CUDA +-#include "third_party/cub/block/block_load.cuh" +-#include "third_party/cub/block/block_scan.cuh" +-#include "third_party/cub/block/block_store.cuh" +-#include "third_party/cub/device/device_histogram.cuh" +-#include "third_party/cub/device/device_radix_sort.cuh" +-#include "third_party/cub/device/device_reduce.cuh" +-#include "third_party/cub/device/device_segmented_radix_sort.cuh" +-#include "third_party/cub/device/device_segmented_reduce.cuh" +-#include "third_party/cub/device/device_select.cuh" +-#include "third_party/cub/iterator/counting_input_iterator.cuh" +-#include "third_party/cub/iterator/transform_input_iterator.cuh" +-#include "third_party/cub/thread/thread_operators.cuh" +-#include "third_party/cub/warp/warp_reduce.cuh" ++#include "cub/block/block_load.cuh" ++#include "cub/block/block_scan.cuh" ++#include "cub/block/block_store.cuh" ++#include "cub/device/device_histogram.cuh" ++#include "cub/device/device_radix_sort.cuh" ++#include "cub/device/device_reduce.cuh" ++#include "cub/device/device_segmented_radix_sort.cuh" ++#include "cub/device/device_segmented_reduce.cuh" ++#include "cub/device/device_select.cuh" ++#include "cub/iterator/counting_input_iterator.cuh" ++#include "cub/iterator/transform_input_iterator.cuh" ++#include "cub/thread/thread_operators.cuh" ++#include "cub/warp/warp_reduce.cuh" + #include "third_party/gpus/cuda/include/cusparse.h" + + namespace gpuprim = ::cub; +diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD +index 4d2ff9a805811..241e382a650ba 100644 +--- a/tensorflow/core/util/BUILD ++++ b/tensorflow/core/util/BUILD +@@ -626,7 +626,7 @@ tf_kernel_library( + "//tensorflow/core:lib", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", +- "@cub_archive//:cub", ++ "@local_config_cuda//cuda:cub_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +diff --git a/third_party/cub.BUILD b/third_party/cub.BUILD +index a04347b21eefb..29159c9dad3d3 100644 +--- a/third_party/cub.BUILD ++++ b/third_party/cub.BUILD +@@ -20,7 +20,6 @@ filegroup( + cc_library( + name = "cub", + hdrs = if_cuda([":cub_header_files"]), +- include_prefix = "third_party", + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], +diff --git a/third_party/cub.pr170.patch b/third_party/cub.pr170.patch +deleted file mode 100644 +index 5b7432e885867..0000000000000 +--- a/third_party/cub.pr170.patch ++++ /dev/null +@@ -1,48 +0,0 @@ +-From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001 +-From: Artem Belevich <tra@google.com> +-Date: Mon, 23 Sep 2019 11:18:56 -0700 +-Subject: [PATCH] Fix CUDA version detection in CUB +- +-This fixes the problem with CUB using deprecated shfl/vote instructions when CUB +-is compiled with clang (e.g. some TensorFlow builds). +---- +- cub/util_arch.cuh | 3 ++- +- cub/util_type.cuh | 4 ++-- +- 2 files changed, 4 insertions(+), 3 deletions(-) +- +-diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh +-index 87c5ea2fb..9ad9d1cbb 100644 +---- a/cub/util_arch.cuh +-+++ b/cub/util_arch.cuh +-@@ -44,7 +44,8 @@ namespace cub { +- +- #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +- +--#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) +-+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \ +-+ (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- #define CUB_USE_COOPERATIVE_GROUPS +- #endif +- +-diff --git a/cub/util_type.cuh b/cub/util_type.cuh +-index 0ba41e1ed..b2433d735 100644 +---- a/cub/util_type.cuh +-+++ b/cub/util_type.cuh +-@@ -37,7 +37,7 @@ +- #include <limits> +- #include <cfloat> +- +--#if (__CUDACC_VER_MAJOR__ >= 9) +-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- #include <cuda_fp16.h> +- #endif +- +-@@ -1063,7 +1063,7 @@ struct FpLimits<double> +- }; +- +- +--#if (__CUDACC_VER_MAJOR__ >= 9) +-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) +- template <> +- struct FpLimits<__half> +- { +diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl +index e5833e7cdbbc2..a4a21abc36769 100644 +--- a/third_party/gpus/cuda/BUILD.tpl ++++ b/third_party/gpus/cuda/BUILD.tpl +@@ -176,6 +176,11 @@ cc_library( + ], + ) + ++alias( ++ name = "cub_headers", ++ actual = "%{cub_actual}" ++) ++ + cuda_header_library( + name = "cupti_headers", + hdrs = [":cuda-extras"], +@@ -224,3 +229,4 @@ py_library( + ) + + %{copy_rules} ++ +diff --git a/third_party/gpus/cuda/BUILD.windows.tpl b/third_party/gpus/cuda/BUILD.windows.tpl +index 55a9ec3d1ab10..cabfac28fc357 100644 +--- a/third_party/gpus/cuda/BUILD.windows.tpl ++++ b/third_party/gpus/cuda/BUILD.windows.tpl +@@ -171,6 +171,11 @@ cc_library( + ], + ) + ++alias( ++ name = "cub_headers", ++ actual = "%{cub_actual}" ++) ++ + cuda_header_library( + name = "cupti_headers", + hdrs = [":cuda-extras"], +diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl +index 70bb91159de1a..ea33963fe19fb 100644 +--- a/third_party/gpus/cuda_configure.bzl ++++ b/third_party/gpus/cuda_configure.bzl +@@ -692,6 +692,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + return struct( + cuda_toolkit_path = toolkit_path, + cuda_version = cuda_version, ++ cuda_version_major = cuda_major, + cublas_version = cublas_version, + cusolver_version = cusolver_version, + curand_version = curand_version, +@@ -776,6 +777,7 @@ def _create_dummy_repository(repository_ctx): + "%{curand_lib}": lib_name("curand", cpu_value), + "%{cupti_lib}": lib_name("cupti", cpu_value), + "%{cusparse_lib}": lib_name("cusparse", cpu_value), ++ "%{cub_actual}": ":cuda_headers", + "%{copy_rules}": """ + filegroup(name="cuda-include") + filegroup(name="cublas-include") +@@ -1122,6 +1124,10 @@ def _create_local_cuda_repository(repository_ctx): + }, + ) + ++ cub_actual = "@cub_archive//:cub" ++ if int(cuda_config.cuda_version_major) >= 11: ++ cub_actual = ":cuda_headers" ++ + repository_ctx.template( + "cuda/BUILD", + tpl_paths["cuda:BUILD"], +@@ -1137,6 +1143,7 @@ def _create_local_cuda_repository(repository_ctx): + "%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]), + "%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]), + "%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]), ++ "%{cub_actual}": cub_actual, + "%{copy_rules}": "\n".join(copy_rules), + }, + ) diff --git a/cuda11.1.patch b/cuda11.1.patch new file mode 100644 index 000000000000..e4e9dd769a36 --- /dev/null +++ b/cuda11.1.patch @@ -0,0 +1,136 @@ +From 4a64bbe4ff9fb03a948ee76f7349cfdb9e9b7528 Mon Sep 17 00:00:00 2001 +From: Nathan Luehr <nluehr@nvidia.com> +Date: Thu, 13 Aug 2020 09:46:43 -0700 +Subject: [PATCH 1/2] Fix cudart 11.1 soname + +--- + third_party/gpus/cuda_configure.bzl | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl +index ea33963fe19fb..3e6bdc9d8eb22 100644 +--- a/third_party/gpus/cuda_configure.bzl ++++ b/third_party/gpus/cuda_configure.bzl +@@ -534,14 +534,14 @@ def _find_libs(repository_ctx, check_cuda_libs_script, cuda_config): + "cudart", + cpu_value, + cuda_config.config["cuda_library_dir"], +- cuda_config.cuda_version, ++ cuda_config.cudart_version, + static = False, + ), + "cudart_static": _check_cuda_lib_params( + "cudart_static", + cpu_value, + cuda_config.config["cuda_library_dir"], +- cuda_config.cuda_version, ++ cuda_config.cudart_version, + static = True, + ), + "cublas": _check_cuda_lib_params( +@@ -651,6 +651,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + cuda_toolkit_path: The CUDA toolkit installation directory. + cudnn_install_basedir: The cuDNN installation directory. + cuda_version: The version of CUDA on the system. ++ cudart_version: The CUDA runtime version on the system. + cudnn_version: The version of cuDNN on the system. + compute_capabilities: A list of the system's CUDA compute capabilities. + cpu_value: The name of the host operating system. +@@ -668,6 +669,10 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"] + + if int(cuda_major) >= 11: ++ if int(cuda_major) == 11: ++ cudart_version = "64_110" if is_windows else "11.0" ++ else: ++ cudart_version = ("64_%s" if is_windows else "%s") % cuda_major + cublas_version = ("64_%s" if is_windows else "%s") % config["cublas_version"].split(".")[0] + cusolver_version = ("64_%s" if is_windows else "%s") % config["cusolver_version"].split(".")[0] + curand_version = ("64_%s" if is_windows else "%s") % config["curand_version"].split(".")[0] +@@ -677,12 +682,14 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + # cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc. + # It changed from 'x.y' to just 'x' in CUDA 10.1. + cuda_lib_version = ("64_%s" if is_windows else "%s") % cuda_major ++ cudart_version = cuda_version + cublas_version = cuda_lib_version + cusolver_version = cuda_lib_version + curand_version = cuda_lib_version + cufft_version = cuda_lib_version + cusparse_version = cuda_lib_version + else: ++ cudart_version = cuda_version + cublas_version = cuda_version + cusolver_version = cuda_version + curand_version = cuda_version +@@ -693,6 +700,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): + cuda_toolkit_path = toolkit_path, + cuda_version = cuda_version, ++ cudart_version = cudart_version, + cublas_version = cublas_version, + cusolver_version = cusolver_version, + curand_version = curand_version, + +From 2642e93e6cbb7a3a1e916abf1ab8e18fa2735237 Mon Sep 17 00:00:00 2001 +From: Nathan Luehr <nluehr@nvidia.com> +Date: Fri, 14 Aug 2020 13:21:58 -0700 +Subject: [PATCH 2/2] Use correct cudart soname in GetDsoHandle + +--- + tensorflow/stream_executor/platform/default/dso_loader.cc | 3 ++- + third_party/gpus/cuda/cuda_config.h.tpl | 1 + + third_party/gpus/cuda_configure.bzl | 2 ++ + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc +index 84293b7767a20..a78c738f32c2a 100644 +--- a/tensorflow/stream_executor/platform/default/dso_loader.cc ++++ b/tensorflow/stream_executor/platform/default/dso_loader.cc +@@ -31,6 +31,7 @@ namespace internal { + + namespace { + string GetCudaVersion() { return TF_CUDA_VERSION; } ++string GetCudaRtVersion() { return TF_CUDART_VERSION; } + string GetCudnnVersion() { return TF_CUDNN_VERSION; } + string GetCublasVersion() { return TF_CUBLAS_VERSION; } + string GetCusolverVersion() { return TF_CUSOLVER_VERSION; } +@@ -77,7 +78,7 @@ port::StatusOr<void*> GetCudaDriverDsoHandle() { + } + + port::StatusOr<void*> GetCudaRuntimeDsoHandle() { +- return GetDsoHandle("cudart", GetCudaVersion()); ++ return GetDsoHandle("cudart", GetCudaRtVersion()); + } + + port::StatusOr<void*> GetCublasDsoHandle() { +diff --git a/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/gpus/cuda/cuda_config.h.tpl +index b59889938b1a9..ab26686ccb8b2 100644 +--- a/third_party/gpus/cuda/cuda_config.h.tpl ++++ b/third_party/gpus/cuda/cuda_config.h.tpl +@@ -17,6 +17,7 @@ limitations under the License. + #define CUDA_CUDA_CONFIG_H_ + + #define TF_CUDA_VERSION "%{cuda_version}" ++#define TF_CUDART_VERSION "%{cudart_version}" + #define TF_CUBLAS_VERSION "%{cublas_version}" + #define TF_CUSOLVER_VERSION "%{cusolver_version}" + #define TF_CURAND_VERSION "%{curand_version}" +diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl +index 3e6bdc9d8eb22..f85a53b1593b4 100644 +--- a/third_party/gpus/cuda_configure.bzl ++++ b/third_party/gpus/cuda_configure.bzl +@@ -824,6 +824,7 @@ filegroup(name="cudnn-include") + "cuda:cuda_config.h", + { + "%{cuda_version}": "", ++ "%{cudart_version}": "", + "%{cublas_version}": "", + "%{cusolver_version}": "", + "%{curand_version}": "", +@@ -1289,6 +1290,7 @@ def _create_local_cuda_repository(repository_ctx): + tpl_paths["cuda:cuda_config.h"], + { + "%{cuda_version}": cuda_config.cuda_version, ++ "%{cudart_version}": cuda_config.cudart_version, + "%{cublas_version}": cuda_config.cublas_version, + "%{cusolver_version}": cuda_config.cusolver_version, + "%{curand_version}": cuda_config.curand_version, diff --git a/fix-h5py3.0.patch b/fix-h5py3.0.patch new file mode 100644 index 000000000000..18e55a5297a4 --- /dev/null +++ b/fix-h5py3.0.patch @@ -0,0 +1,18 @@ +diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py +index d3bb10c98d..e89f5356bb 100644 +--- a/tensorflow/python/keras/saving/hdf5_format.py ++++ b/tensorflow/python/keras/saving/hdf5_format.py +@@ -659,11 +659,11 @@ def load_weights_from_hdf5_group(f, layers): + and weights file. + """ + if 'keras_version' in f.attrs: +- original_keras_version = f.attrs['keras_version'].decode('utf8') ++ original_keras_version = f.attrs['keras_version'] + else: + original_keras_version = '1' + if 'backend' in f.attrs: +- original_backend = f.attrs['backend'].decode('utf8') ++ original_backend = f.attrs['backend'] + else: + original_backend = None + diff --git a/numpy1.20.patch b/numpy1.20.patch new file mode 100644 index 000000000000..5198d2d7997a --- /dev/null +++ b/numpy1.20.patch @@ -0,0 +1,45 @@ +From 75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf Mon Sep 17 00:00:00 2001 +From: Christian Sigg <csigg@google.com> +Date: Fri, 26 Jun 2020 05:08:10 -0700 +Subject: [PATCH] Provide overload to cope with const-ness change of NumPy's + PyUFuncGenericFunction. + +See https://github.com/tensorflow/tensorflow/issues/40688, https://github.com/tensorflow/tensorflow/pull/40654. + +PiperOrigin-RevId: 318452381 +Change-Id: Icc5152f2b020ef19882a49e3c86ac80bbe048d64 +--- + tensorflow/python/lib/core/bfloat16.cc | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc +index feb01f11a1af2..bb6b720febe59 100644 +--- a/tensorflow/python/lib/core/bfloat16.cc ++++ b/tensorflow/python/lib/core/bfloat16.cc +@@ -517,7 +517,7 @@ bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) { + } + + template <typename InType, typename OutType, typename Functor> +-void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps, ++void BinaryUFunc(char** args, const npy_intp* dimensions, const npy_intp* steps, + void* data) { + const char* i0 = args[0]; + const char* i1 = args[1]; +@@ -532,11 +532,17 @@ void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps, + } + } + ++// Numpy changed const-ness of PyUFuncGenericFunction, provide overload. + template <typename Functor> + void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps, + void* data) { + BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data); + } ++template <typename Functor> ++void CompareUFunc(char** args, const npy_intp* dimensions, ++ const npy_intp* steps, void* data) { ++ BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data); ++} + + struct Bfloat16EqFunctor { + npy_bool operator()(bfloat16 a, bfloat16 b) { return a == b; } |