summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Viallon2020-11-17 18:22:30 +0100
committerAntoine Viallon2020-11-17 18:22:30 +0100
commit409239779029b5fab891b4d4259d65b4bc5cd1c2 (patch)
treea6351a21a864ba8981cdbafdcacd5235a1c2998b
downloadaur-409239779029b5fab891b4d4259d65b4bc5cd1c2.tar.gz
Initial commit
-rw-r--r--.SRCINFO137
-rw-r--r--PKGBUILD258
-rw-r--r--build-against-actual-mkl.patch37
-rw-r--r--c6769e20bf6096d5828e2590def2b25edb3189d6.patch251
-rw-r--r--cuda11.1.patch136
-rw-r--r--fix-h5py3.0.patch18
-rw-r--r--numpy1.20.patch45
7 files changed, 882 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO
new file mode 100644
index 000000000000..aa97cd75998d
--- /dev/null
+++ b/.SRCINFO
@@ -0,0 +1,137 @@
+pkgbase = tensorflow-opt-native
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CFLAGS)
+ pkgver = 2.3.1
+ pkgrel = 5
+ url = https://www.tensorflow.org/
+ arch = x86_64
+ license = APACHE
+ makedepends = bazel
+ makedepends = python-numpy
+ makedepends = cuda
+ makedepends = nvidia-utils
+ makedepends = nccl
+ makedepends = git
+ makedepends = gcc9
+ makedepends = cudnn
+ makedepends = python-pip
+ makedepends = python-wheel
+ makedepends = python-setuptools
+ makedepends = python-h5py
+ makedepends = python-keras-applications
+ makedepends = python-keras-preprocessing
+ makedepends = cython
+ depends = c-ares
+ depends = intel-mkl
+ depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
+ optdepends = tensorboard: Tensorflow visualization toolkit
+ source = tensorflow-opt-native-2.3.1.tar.gz::https://github.com/tensorflow/tensorflow/archive/v2.3.1.tar.gz
+ source = https://github.com/tensorflow/tensorflow/commit/c6769e20bf6096d5828e2590def2b25edb3189d6.patch
+ source = numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
+ source = cuda11.1.patch
+ source = fix-h5py3.0.patch
+ source = build-against-actual-mkl.patch
+ sha512sums = e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0
+ sha512sums = 9dcaef0dd4fdd7008a27e383ef87c97990ba883a3094f214f821a039994933ec6ec47f5a832570e5c4b783e0493ce2236e7957e596395c4dee40f9bf2621ff2f
+ sha512sums = df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc
+ sha512sums = 0caa5170c22fdea2c708ecdb2c980d668464d9dba9c9730a9ec5e9258572576d783fa1d19da04d5f9d2b06aed36d30971526f212fc64c53d09c3e821bd1a3b5d
+ sha512sums = 9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a
+ sha512sums = e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08
+
+pkgname = tensorflow-opt-native
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)
+ provides = tensorflow
+ conflicts = tensorflow
+
+pkgname = tensorflow-opt-native-cuda
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)
+ depends = c-ares
+ depends = intel-mkl
+ depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
+ depends = cuda
+ depends = cudnn
+ depends = nccl
+ provides = tensorflow
+ provides = tensorflow-cuda
+ conflicts = tensorflow
+
+pkgname = python-tensorflow-opt-native
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)
+ depends = c-ares
+ depends = intel-mkl
+ depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
+ depends = tensorflow-opt
+ depends = python-termcolor
+ depends = python-astor
+ depends = python-gast
+ depends = python-numpy
+ depends = python-protobuf
+ depends = absl-py
+ depends = python-h5py
+ depends = python-keras-applications
+ depends = python-keras-preprocessing
+ depends = python-tensorflow-estimator
+ depends = python-opt_einsum
+ depends = python-astunparse
+ depends = python-pasta
+ provides = python-tensorflow
+ conflicts = python-tensorflow
+
+pkgname = python-tensorflow-opt-native-cuda
+ pkgdesc = Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)
+ depends = c-ares
+ depends = intel-mkl
+ depends = onednn
+ depends = pybind11
+ depends = openssl-1.0
+ depends = lmdb
+ depends = libpng
+ depends = curl
+ depends = giflib
+ depends = icu
+ depends = libjpeg-turbo
+ depends = tensorflow-opt-native-cuda
+ depends = python-termcolor
+ depends = python-astor
+ depends = python-gast
+ depends = python-numpy
+ depends = cuda
+ depends = cudnn
+ depends = python-pycuda
+ depends = python-protobuf
+ depends = absl-py
+ depends = nccl
+ depends = python-h5py
+ depends = python-keras-applications
+ depends = python-keras-preprocessing
+ depends = python-tensorflow-estimator
+ depends = python-opt_einsum
+ depends = python-astunparse
+ depends = python-pasta
+ provides = python-tensorflow
+ provides = python-tensorflow-cuda
+ conflicts = python-tensorflow
+
diff --git a/PKGBUILD b/PKGBUILD
new file mode 100644
index 000000000000..75ab30693e44
--- /dev/null
+++ b/PKGBUILD
@@ -0,0 +1,258 @@
+# Maintainer: Antoine Viallon <antoine@lesviallon.fr>
+# Maintainer (repos): Sven-Hendrik Haase <svenstaro@gmail.com>
+# Maintainer (repos): Konstantin Gizdov (kgizdov) <arch@kge.pw>
+# Contributor: Adria Arrufat (archdria) <adria.arrufat+AUR@protonmail.ch>
+# Contributor: Thibault Lorrain (fredszaq) <fredszaq@gmail.com>
+
+pkgbase=tensorflow-opt-native
+pkgname=(tensorflow-opt-native tensorflow-opt-native-cuda python-tensorflow-opt-native python-tensorflow-opt-native-cuda)
+pkgver=2.3.1
+_pkgver=2.3.1
+pkgrel=5
+pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CFLAGS)"
+url="https://www.tensorflow.org/"
+license=('APACHE')
+arch=('x86_64')
+depends=('c-ares' 'intel-mkl' 'onednn' 'pybind11' 'openssl-1.0' 'lmdb' 'libpng' 'curl' 'giflib' 'icu' 'libjpeg-turbo')
+makedepends=('bazel' 'python-numpy' 'cuda' 'nvidia-utils' 'nccl' 'git' 'gcc9'
+ 'cudnn' 'python-pip' 'python-wheel' 'python-setuptools' 'python-h5py'
+ 'python-keras-applications' 'python-keras-preprocessing'
+ 'cython')
+optdepends=('tensorboard: Tensorflow visualization toolkit')
+source=("$pkgname-$pkgver.tar.gz::https://github.com/tensorflow/tensorflow/archive/v${_pkgver}.tar.gz"
+ https://github.com/tensorflow/tensorflow/commit/c6769e20bf6096d5828e2590def2b25edb3189d6.patch
+ numpy1.20.patch::https://github.com/tensorflow/tensorflow/commit/75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf.patch
+ cuda11.1.patch
+ fix-h5py3.0.patch
+ build-against-actual-mkl.patch)
+sha512sums=('e497ef4564f50abf9f918be4522cf702f4cf945cb1ebf83af1386ac4ddc7373b3ba70c7f803f8ca06faf2c6b5396e60b1e0e9b97bfbd667e733b08b6e6d70ef0'
+ '9dcaef0dd4fdd7008a27e383ef87c97990ba883a3094f214f821a039994933ec6ec47f5a832570e5c4b783e0493ce2236e7957e596395c4dee40f9bf2621ff2f'
+ 'df2e0373e2f63b8766f31933f7db57f6a7559b8f03af1db51644fba87731451a7cd3895529a3192e5394612fcb42f245b794b1c9ca3c05881ca03a547c8c9acc'
+ '0caa5170c22fdea2c708ecdb2c980d668464d9dba9c9730a9ec5e9258572576d783fa1d19da04d5f9d2b06aed36d30971526f212fc64c53d09c3e821bd1a3b5d'
+ '9d7b71fed280ffaf4dfcd4889aa9ab5471874c153259f3e77ed6e6efa745e5c5aa8507d3d1f71dead5b6f4bea5f8b1c10c543929f37a6580c3f4a7cbec338a6a'
+ 'e51e3f3dced121db3a09fbdaefd33555536095584b72a5eb6f302fa6fa68ab56ea45e8a847ec90ff4ba076db312c06f91ff672e08e95263c658526582494ce08')
+
+get_pyver () {
+ python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
+}
+
+check_dir() {
+ if [ -d "${1}" ]; then
+ return 0
+ else
+ >&2 echo Directory "${1}" does not exist or is a file! Exiting...
+ exit 1
+ fi
+}
+
+prepare() {
+ # Allow any bazel version
+ echo "*" > tensorflow-${_pkgver}/.bazelversion
+
+ # Tensorflow actually wants to build against a slimmed down version of Intel MKL called MKLML
+ # See https://github.com/intel/mkl-dnn/issues/102
+ # MKLML version that Tensorflow wants to use is https://github.com/intel/mkl-dnn/releases/tag/v0.21
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/build-against-actual-mkl.patch
+
+ # Fix wrong SONAME being shipped in CUDA 11.1
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/cuda11.1.patch
+
+ # Compile with C++17 by default (FS#65953)
+ #sed -i "s/c++14/c++17/g" tensorflow-${_pkgver}/.bazelrc
+
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/numpy1.20.patch
+
+ # FS#68488
+ patch -Np1 -d tensorflow-${_pkgver} -i "$srcdir"/fix-h5py3.0.patch
+
+ cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-native
+ cp -r tensorflow-${_pkgver} tensorflow-${_pkgver}-opt-native-cuda
+
+ # These environment variables influence the behavior of the configure call below.
+ export PYTHON_BIN_PATH=/usr/bin/python
+ export USE_DEFAULT_PYTHON_LIB_PATH=1
+ export TF_NEED_JEMALLOC=1
+ export TF_NEED_KAFKA=1
+ export TF_NEED_OPENCL_SYCL=0
+ export TF_NEED_AWS=1
+ export TF_NEED_GCP=1
+ export TF_NEED_HDFS=1
+ export TF_NEED_S3=1
+ export TF_ENABLE_XLA=1
+ export TF_NEED_GDR=0
+ export TF_NEED_VERBS=0
+ export TF_NEED_OPENCL=0
+ export TF_NEED_MPI=0
+ export TF_NEED_TENSORRT=0
+ export TF_NEED_NGRAPH=0
+ export TF_NEED_IGNITE=0
+ export TF_NEED_ROCM=0
+ # See https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl
+ export TF_SYSTEM_LIBS="boringssl,curl,cython,gif,icu,libjpeg_turbo,lmdb,nasm,pcre,png,pybind11,zlib"
+ export TF_SET_ANDROID_WORKSPACE=0
+ export TF_DOWNLOAD_CLANG=0
+ export TF_NCCL_VERSION=2.7
+ export TF_IGNORE_MAX_BAZEL_VERSION=1
+ export TF_MKL_ROOT=/opt/intel/mkl
+ export NCCL_INSTALL_PATH=/usr
+ export GCC_HOST_COMPILER_PATH=/usr/bin/gcc-9
+ export HOST_C_COMPILER=/usr/bin/gcc-9
+ export HOST_CXX_COMPILER=/usr/bin/g++-9
+ export TF_CUDA_CLANG=0 # Clang currently disabled because it's not compatible at the moment.
+ export CLANG_CUDA_COMPILER_PATH=/usr/bin/clang
+ export TF_CUDA_PATHS=/opt/cuda,/usr/lib,/usr
+ export TF_CUDA_VERSION=$(/opt/cuda/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p')
+ export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' /usr/include/cudnn_version.h)
+ if [[ -z "$COMPUTE_CAPABILITIES" ]]; then
+ COMPUTE_CAPABILITIES=5.2,5.3,6.0,6.1,6.2,7.0,7.2,7.5,8.0,8.6
+ else
+ echo -e "\e[1mCUDA compute capabilites overriden by env: \e[0m${COMPUTE_CAPABILITIES}"
+ fi
+ export TF_CUDA_COMPUTE_CAPABILITIES=$COMPUTE_CAPABILITIES
+
+ # Required until https://github.com/tensorflow/tensorflow/issues/39467 is fixed.
+ export CC=gcc-9
+ export CXX=g++-9
+
+ export BAZEL_ARGS="--config=mkl -c opt --copt=-I/usr/include/openssl-1.0 --host_copt=-I/usr/include/openssl-1.0 --linkopt=-l:libssl.so.1.0.0 --linkopt=-l:libcrypto.so.1.0.0 --host_linkopt=-l:libssl.so.1.0.0 --host_linkopt=-l:libcrypto.so.1.0.0"
+}
+
+build() {
+ echo "Building without cuda and with native optimizations"
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native
+ export CC_OPT_FLAGS="$CFLAGS -O3 -funsafe-math-optimizations"
+
+ _copts=()
+ for copt in $CC_OPT_FLAGS; do
+ _copts+=(--copt="$copt")
+ done
+ echo ${_copts}
+ echo ${_copts[@]}
+
+ export TF_NEED_CUDA=0
+ ./configure
+ bazel \
+ build --config=avx2_linux \
+ ${BAZEL_ARGS[@]} \
+ ${_copts[@]} \
+ //tensorflow:libtensorflow.so \
+ //tensorflow:libtensorflow_cc.so \
+ //tensorflow:install_headers \
+ //tensorflow/tools/pip_package:build_pip_package
+ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${srcdir}"/tmpopt
+
+
+ echo "Building with cuda and with non-x86-64 optimizations"
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda
+ export CC_OPT_FLAGS="$CFLAGS -O3 -funsafe-math-optimizations"
+ export TF_NEED_CUDA=1
+ export TF_CUDA_CLANG=0
+ ./configure
+ bazel \
+ build --config=avx2_linux \
+ ${BAZEL_ARGS[@]} \
+ ${_copts[@]} \
+ //tensorflow:libtensorflow.so \
+ //tensorflow:libtensorflow_cc.so \
+ //tensorflow:install_headers \
+ //tensorflow/tools/pip_package:build_pip_package
+ bazel-bin/tensorflow/tools/pip_package/build_pip_package --gpu "${srcdir}"/tmpoptcuda
+}
+
+_package() {
+ # install headers first
+ install -d "${pkgdir}"/usr/include/tensorflow
+ cp -r bazel-bin/tensorflow/include/* "${pkgdir}"/usr/include/tensorflow/
+ # install python-version to get all extra headers
+ WHEEL_PACKAGE=$(find "${srcdir}"/$1 -name "tensor*.whl")
+ pip install --ignore-installed --upgrade --root "${pkgdir}"/ $WHEEL_PACKAGE --no-dependencies
+ # move extra headers to correct location
+ local _srch_path="${pkgdir}/usr/lib/python$(get_pyver)"/site-packages/tensorflow/include
+ check_dir "${_srch_path}" # we need to quit on broken search paths
+ find "${_srch_path}" -maxdepth 1 -mindepth 1 -type d -print0 | while read -rd $'\0' _folder; do
+ cp -nr "${_folder}" "${pkgdir}"/usr/include/tensorflow/
+ done
+ # clean up unneeded files
+ rm -rf "${pkgdir}"/usr/bin
+ rm -rf "${pkgdir}"/usr/lib
+ rm -rf "${pkgdir}"/usr/share
+
+ # install the rest of tensorflow
+ tensorflow/c/generate-pc.sh --prefix=/usr --version=${pkgver}
+ sed -e 's@/include$@/include/tensorflow@' -i tensorflow.pc -i tensorflow_cc.pc
+ install -Dm644 tensorflow.pc "${pkgdir}"/usr/lib/pkgconfig/tensorflow.pc
+ install -Dm644 tensorflow_cc.pc "${pkgdir}"/usr/lib/pkgconfig/tensorflow_cc.pc
+ install -Dm755 bazel-bin/tensorflow/libtensorflow.so "${pkgdir}"/usr/lib/libtensorflow.so.${pkgver}
+ ln -s libtensorflow.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow.so.${pkgver:0:1}
+ ln -s libtensorflow.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow.so
+ install -Dm755 bazel-bin/tensorflow/libtensorflow_cc.so "${pkgdir}"/usr/lib/libtensorflow_cc.so.${pkgver}
+ ln -s libtensorflow_cc.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow_cc.so.${pkgver:0:1}
+ ln -s libtensorflow_cc.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow_cc.so
+ install -Dm755 bazel-bin/tensorflow/libtensorflow_framework.so "${pkgdir}"/usr/lib/libtensorflow_framework.so.${pkgver}
+ ln -s libtensorflow_framework.so.${pkgver} "${pkgdir}"/usr/lib/libtensorflow_framework.so.${pkgver:0:1}
+ ln -s libtensorflow_framework.so.${pkgver:0:1} "${pkgdir}"/usr/lib/libtensorflow_framework.so
+ install -Dm644 tensorflow/c/c_api.h "${pkgdir}"/usr/include/tensorflow/tensorflow/c/c_api.h
+ install -Dm644 LICENSE "${pkgdir}"/usr/share/licenses/${pkgname}/LICENSE
+}
+
+_python_package() {
+ WHEEL_PACKAGE=$(find "${srcdir}"/$1 -name "tensor*.whl")
+ pip install --ignore-installed --upgrade --root "${pkgdir}"/ $WHEEL_PACKAGE --no-dependencies
+
+ # create symlinks to headers
+ local _srch_path="${pkgdir}/usr/lib/python$(get_pyver)"/site-packages/tensorflow/include/
+ check_dir "${_srch_path}" # we need to quit on broken search paths
+ find "${_srch_path}" -maxdepth 1 -mindepth 1 -type d -print0 | while read -rd $'\0' _folder; do
+ rm -rf "${_folder}"
+ _smlink="$(basename "${_folder}")"
+ ln -s /usr/include/tensorflow/"${_smlink}" "${_srch_path}"
+ done
+
+ # tensorboard has been separated from upstream but they still install it with
+ # tensorflow. I don't know what kind of sense that makes but we have to clean
+ # it out from this pacakge.
+ rm -rf "${pkgdir}"/usr/bin/tensorboard
+
+ install -Dm644 LICENSE "${pkgdir}"/usr/share/licenses/${pkgname}/LICENSE
+}
+
+package_tensorflow-opt-native() {
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)"
+ conflicts=(tensorflow)
+ provides=(tensorflow)
+
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native
+ _package tmpopt
+}
+
+package_tensorflow-opt-native-cuda() {
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)"
+ depends+=(cuda cudnn nccl)
+ conflicts=(tensorflow)
+ provides=(tensorflow tensorflow-cuda)
+
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda
+ _package tmpoptcuda
+}
+
+package_python-tensorflow-opt-native() {
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CPU optimizations)"
+ depends+=(tensorflow-opt python-termcolor python-astor python-gast python-numpy python-protobuf absl-py python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-pasta)
+ conflicts=(python-tensorflow)
+ provides=(python-tensorflow)
+
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native
+ _python_package tmpopt
+}
+
+package_python-tensorflow-opt-native-cuda() {
+ pkgdesc="Library for computation using data flow graphs for scalable machine learning (with CUDA and CPU optimizations)"
+ depends+=(tensorflow-opt-native-cuda python-termcolor python-astor python-gast python-numpy cuda cudnn python-pycuda python-protobuf absl-py nccl python-h5py python-keras-applications python-keras-preprocessing python-tensorflow-estimator python-opt_einsum python-astunparse python-pasta)
+ conflicts=(python-tensorflow)
+ provides=(python-tensorflow python-tensorflow-cuda)
+
+ cd "${srcdir}"/tensorflow-${_pkgver}-opt-native-cuda
+ _python_package tmpoptcuda
+}
+
+# vim:set ts=2 sw=2 et:
diff --git a/build-against-actual-mkl.patch b/build-against-actual-mkl.patch
new file mode 100644
index 000000000000..c1589b568751
--- /dev/null
+++ b/build-against-actual-mkl.patch
@@ -0,0 +1,37 @@
+diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
+index 4b8fb83eb0..f4e1adfb22 100644
+--- a/third_party/mkl/build_defs.bzl
++++ b/third_party/mkl/build_defs.bzl
+@@ -124,7 +124,7 @@ def _mkl_autoconf_impl(repository_ctx):
+ if _enable_local_mkl(repository_ctx):
+ # Symlink lib and include local folders.
+ mkl_root = repository_ctx.os.environ[_TF_MKL_ROOT]
+- mkl_lib_path = "%s/lib" % mkl_root
++ mkl_lib_path = "%s/lib/intel64" % mkl_root
+ repository_ctx.symlink(mkl_lib_path, "lib")
+ mkl_include_path = "%s/include" % mkl_root
+ repository_ctx.symlink(mkl_include_path, "include")
+diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD
+index 72370182c4..4972bb005e 100644
+--- a/third_party/mkl/mkl.BUILD
++++ b/third_party/mkl/mkl.BUILD
+@@ -5,7 +5,6 @@ exports_files(["license.txt"])
+ filegroup(
+ name = "LICENSE",
+ srcs = [
+- "license.txt",
+ ],
+ visibility = ["//visibility:public"],
+ )
+@@ -21,7 +20,10 @@ cc_library(
+ name = "mkl_libs_linux",
+ srcs = [
+ "lib/libiomp5.so",
+- "lib/libmklml_intel.so",
++ "lib/libmkl_core.so",
++ "lib/libmkl_rt.so",
++ "lib/libmkl_intel_thread.so",
++ "lib/libmkl_intel_lp64.so",
+ ],
+ visibility = ["//visibility:public"],
+ )
diff --git a/c6769e20bf6096d5828e2590def2b25edb3189d6.patch b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch
new file mode 100644
index 000000000000..73e95908867f
--- /dev/null
+++ b/c6769e20bf6096d5828e2590def2b25edb3189d6.patch
@@ -0,0 +1,251 @@
+From c6769e20bf6096d5828e2590def2b25edb3189d6 Mon Sep 17 00:00:00 2001
+From: Christian Sigg <csigg@google.com>
+Date: Mon, 17 Aug 2020 14:12:02 -0700
+Subject: [PATCH] Use CUB from the CUDA Toolkit starting with version 11.0.
+
+PiperOrigin-RevId: 327096097
+Change-Id: I444ec3ac3348f76728c931a4bb4aa1b7cbe1b673
+---
+ tensorflow/core/kernels/BUILD | 8 ++---
+ tensorflow/core/kernels/gpu_prim.h | 26 +++++++-------
+ tensorflow/core/util/BUILD | 2 +-
+ third_party/cub.BUILD | 1 -
+ third_party/cub.pr170.patch | 48 -------------------------
+ third_party/gpus/cuda/BUILD.tpl | 6 ++++
+ third_party/gpus/cuda/BUILD.windows.tpl | 5 +++
+ third_party/gpus/cuda_configure.bzl | 7 ++++
+ 8 files changed, 36 insertions(+), 67 deletions(-)
+ delete mode 100644 third_party/cub.pr170.patch
+
+diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
+index 88958cdaa9878..19dc5c73252a8 100644
+--- a/tensorflow/core/kernels/BUILD
++++ b/tensorflow/core/kernels/BUILD
+@@ -490,7 +490,7 @@ cc_library(
+ name = "gpu_prim_hdrs",
+ hdrs = ["gpu_prim.h"],
+ deps = if_cuda([
+- "@cub_archive//:cub",
++ "@local_config_cuda//cuda:cub_headers",
+ ]) + if_rocm([
+ "@local_config_rocm//rocm:rocprim",
+ ]),
+@@ -3896,7 +3896,7 @@ tf_kernel_library(
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ ] + if_cuda([
+- "@cub_archive//:cub",
++ "@local_config_cuda//cuda:cub_headers",
+ "@local_config_cuda//cuda:cudnn_header",
+ ]) + if_rocm([
+ "@local_config_rocm//rocm:rocprim",
+@@ -3986,7 +3986,7 @@ tf_kernel_library(
+ ] + if_cuda_or_rocm([
+ ":reduction_ops",
+ ]) + if_cuda([
+- "@cub_archive//:cub",
++ "@local_config_cuda//cuda:cub_headers",
+ "//tensorflow/core:stream_executor",
+ "//tensorflow/stream_executor/cuda:cuda_stream",
+ ]) + if_rocm([
+@@ -4708,7 +4708,7 @@ tf_kernel_library(
+ ] + if_cuda_or_rocm([
+ ":reduction_ops",
+ ]) + if_cuda([
+- "@cub_archive//:cub",
++ "@local_config_cuda//cuda:cub_headers",
+ ]) + if_rocm([
+ "@local_config_rocm//rocm:rocprim",
+ ]),
+diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h
+index 82fcb21e0ac04..33c5df1ae2371 100644
+--- a/tensorflow/core/kernels/gpu_prim.h
++++ b/tensorflow/core/kernels/gpu_prim.h
+@@ -15,19 +15,19 @@ limitations under the license, the license you must see.
+ #define TENSORFLOW_CORE_KERNELS_GPU_PRIM_H_
+
+ #if GOOGLE_CUDA
+-#include "third_party/cub/block/block_load.cuh"
+-#include "third_party/cub/block/block_scan.cuh"
+-#include "third_party/cub/block/block_store.cuh"
+-#include "third_party/cub/device/device_histogram.cuh"
+-#include "third_party/cub/device/device_radix_sort.cuh"
+-#include "third_party/cub/device/device_reduce.cuh"
+-#include "third_party/cub/device/device_segmented_radix_sort.cuh"
+-#include "third_party/cub/device/device_segmented_reduce.cuh"
+-#include "third_party/cub/device/device_select.cuh"
+-#include "third_party/cub/iterator/counting_input_iterator.cuh"
+-#include "third_party/cub/iterator/transform_input_iterator.cuh"
+-#include "third_party/cub/thread/thread_operators.cuh"
+-#include "third_party/cub/warp/warp_reduce.cuh"
++#include "cub/block/block_load.cuh"
++#include "cub/block/block_scan.cuh"
++#include "cub/block/block_store.cuh"
++#include "cub/device/device_histogram.cuh"
++#include "cub/device/device_radix_sort.cuh"
++#include "cub/device/device_reduce.cuh"
++#include "cub/device/device_segmented_radix_sort.cuh"
++#include "cub/device/device_segmented_reduce.cuh"
++#include "cub/device/device_select.cuh"
++#include "cub/iterator/counting_input_iterator.cuh"
++#include "cub/iterator/transform_input_iterator.cuh"
++#include "cub/thread/thread_operators.cuh"
++#include "cub/warp/warp_reduce.cuh"
+ #include "third_party/gpus/cuda/include/cusparse.h"
+
+ namespace gpuprim = ::cub;
+diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD
+index 4d2ff9a805811..241e382a650ba 100644
+--- a/tensorflow/core/util/BUILD
++++ b/tensorflow/core/util/BUILD
+@@ -626,7 +626,7 @@ tf_kernel_library(
+ "//tensorflow/core:lib",
+ ] + if_cuda([
+ "//tensorflow/stream_executor/cuda:cusparse_lib",
+- "@cub_archive//:cub",
++ "@local_config_cuda//cuda:cub_headers",
+ ]) + if_rocm([
+ "@local_config_rocm//rocm:hipsparse",
+ ]),
+diff --git a/third_party/cub.BUILD b/third_party/cub.BUILD
+index a04347b21eefb..29159c9dad3d3 100644
+--- a/third_party/cub.BUILD
++++ b/third_party/cub.BUILD
+@@ -20,7 +20,6 @@ filegroup(
+ cc_library(
+ name = "cub",
+ hdrs = if_cuda([":cub_header_files"]),
+- include_prefix = "third_party",
+ deps = [
+ "@local_config_cuda//cuda:cuda_headers",
+ ],
+diff --git a/third_party/cub.pr170.patch b/third_party/cub.pr170.patch
+deleted file mode 100644
+index 5b7432e885867..0000000000000
+--- a/third_party/cub.pr170.patch
++++ /dev/null
+@@ -1,48 +0,0 @@
+-From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001
+-From: Artem Belevich <tra@google.com>
+-Date: Mon, 23 Sep 2019 11:18:56 -0700
+-Subject: [PATCH] Fix CUDA version detection in CUB
+-
+-This fixes the problem with CUB using deprecated shfl/vote instructions when CUB
+-is compiled with clang (e.g. some TensorFlow builds).
+----
+- cub/util_arch.cuh | 3 ++-
+- cub/util_type.cuh | 4 ++--
+- 2 files changed, 4 insertions(+), 3 deletions(-)
+-
+-diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh
+-index 87c5ea2fb..9ad9d1cbb 100644
+---- a/cub/util_arch.cuh
+-+++ b/cub/util_arch.cuh
+-@@ -44,7 +44,8 @@ namespace cub {
+-
+- #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
+-
+--#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
+-+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \
+-+ (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
+- #define CUB_USE_COOPERATIVE_GROUPS
+- #endif
+-
+-diff --git a/cub/util_type.cuh b/cub/util_type.cuh
+-index 0ba41e1ed..b2433d735 100644
+---- a/cub/util_type.cuh
+-+++ b/cub/util_type.cuh
+-@@ -37,7 +37,7 @@
+- #include <limits>
+- #include <cfloat>
+-
+--#if (__CUDACC_VER_MAJOR__ >= 9)
+-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
+- #include <cuda_fp16.h>
+- #endif
+-
+-@@ -1063,7 +1063,7 @@ struct FpLimits<double>
+- };
+-
+-
+--#if (__CUDACC_VER_MAJOR__ >= 9)
+-+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
+- template <>
+- struct FpLimits<__half>
+- {
+diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
+index e5833e7cdbbc2..a4a21abc36769 100644
+--- a/third_party/gpus/cuda/BUILD.tpl
++++ b/third_party/gpus/cuda/BUILD.tpl
+@@ -176,6 +176,11 @@ cc_library(
+ ],
+ )
+
++alias(
++ name = "cub_headers",
++ actual = "%{cub_actual}"
++)
++
+ cuda_header_library(
+ name = "cupti_headers",
+ hdrs = [":cuda-extras"],
+@@ -224,3 +229,4 @@ py_library(
+ )
+
+ %{copy_rules}
++
+diff --git a/third_party/gpus/cuda/BUILD.windows.tpl b/third_party/gpus/cuda/BUILD.windows.tpl
+index 55a9ec3d1ab10..cabfac28fc357 100644
+--- a/third_party/gpus/cuda/BUILD.windows.tpl
++++ b/third_party/gpus/cuda/BUILD.windows.tpl
+@@ -171,6 +171,11 @@ cc_library(
+ ],
+ )
+
++alias(
++ name = "cub_headers",
++ actual = "%{cub_actual}"
++)
++
+ cuda_header_library(
+ name = "cupti_headers",
+ hdrs = [":cuda-extras"],
+diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
+index 70bb91159de1a..ea33963fe19fb 100644
+--- a/third_party/gpus/cuda_configure.bzl
++++ b/third_party/gpus/cuda_configure.bzl
+@@ -692,6 +692,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script):
+ return struct(
+ cuda_toolkit_path = toolkit_path,
+ cuda_version = cuda_version,
++ cuda_version_major = cuda_major,
+ cublas_version = cublas_version,
+ cusolver_version = cusolver_version,
+ curand_version = curand_version,
+@@ -776,6 +777,7 @@ def _create_dummy_repository(repository_ctx):
+ "%{curand_lib}": lib_name("curand", cpu_value),
+ "%{cupti_lib}": lib_name("cupti", cpu_value),
+ "%{cusparse_lib}": lib_name("cusparse", cpu_value),
++ "%{cub_actual}": ":cuda_headers",
+ "%{copy_rules}": """
+ filegroup(name="cuda-include")
+ filegroup(name="cublas-include")
+@@ -1122,6 +1124,10 @@ def _create_local_cuda_repository(repository_ctx):
+ },
+ )
+
++ cub_actual = "@cub_archive//:cub"
++ if int(cuda_config.cuda_version_major) >= 11:
++ cub_actual = ":cuda_headers"
++
+ repository_ctx.template(
+ "cuda/BUILD",
+ tpl_paths["cuda:BUILD"],
+@@ -1137,6 +1143,7 @@ def _create_local_cuda_repository(repository_ctx):
+ "%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]),
+ "%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]),
+ "%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]),
++ "%{cub_actual}": cub_actual,
+ "%{copy_rules}": "\n".join(copy_rules),
+ },
+ )
diff --git a/cuda11.1.patch b/cuda11.1.patch
new file mode 100644
index 000000000000..e4e9dd769a36
--- /dev/null
+++ b/cuda11.1.patch
@@ -0,0 +1,136 @@
+From 4a64bbe4ff9fb03a948ee76f7349cfdb9e9b7528 Mon Sep 17 00:00:00 2001
+From: Nathan Luehr <nluehr@nvidia.com>
+Date: Thu, 13 Aug 2020 09:46:43 -0700
+Subject: [PATCH 1/2] Fix cudart 11.1 soname
+
+---
+ third_party/gpus/cuda_configure.bzl | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
+index ea33963fe19fb..3e6bdc9d8eb22 100644
+--- a/third_party/gpus/cuda_configure.bzl
++++ b/third_party/gpus/cuda_configure.bzl
+@@ -534,14 +534,14 @@ def _find_libs(repository_ctx, check_cuda_libs_script, cuda_config):
+ "cudart",
+ cpu_value,
+ cuda_config.config["cuda_library_dir"],
+- cuda_config.cuda_version,
++ cuda_config.cudart_version,
+ static = False,
+ ),
+ "cudart_static": _check_cuda_lib_params(
+ "cudart_static",
+ cpu_value,
+ cuda_config.config["cuda_library_dir"],
+- cuda_config.cuda_version,
++ cuda_config.cudart_version,
+ static = True,
+ ),
+ "cublas": _check_cuda_lib_params(
+@@ -651,6 +651,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script):
+ cuda_toolkit_path: The CUDA toolkit installation directory.
+ cudnn_install_basedir: The cuDNN installation directory.
+ cuda_version: The version of CUDA on the system.
++ cudart_version: The CUDA runtime version on the system.
+ cudnn_version: The version of cuDNN on the system.
+ compute_capabilities: A list of the system's CUDA compute capabilities.
+ cpu_value: The name of the host operating system.
+@@ -668,6 +669,10 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script):
+ cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"]
+
+ if int(cuda_major) >= 11:
++ if int(cuda_major) == 11:
++ cudart_version = "64_110" if is_windows else "11.0"
++ else:
++ cudart_version = ("64_%s" if is_windows else "%s") % cuda_major
+ cublas_version = ("64_%s" if is_windows else "%s") % config["cublas_version"].split(".")[0]
+ cusolver_version = ("64_%s" if is_windows else "%s") % config["cusolver_version"].split(".")[0]
+ curand_version = ("64_%s" if is_windows else "%s") % config["curand_version"].split(".")[0]
+@@ -677,12 +682,14 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script):
+ # cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc.
+ # It changed from 'x.y' to just 'x' in CUDA 10.1.
+ cuda_lib_version = ("64_%s" if is_windows else "%s") % cuda_major
++ cudart_version = cuda_version
+ cublas_version = cuda_lib_version
+ cusolver_version = cuda_lib_version
+ curand_version = cuda_lib_version
+ cufft_version = cuda_lib_version
+ cusparse_version = cuda_lib_version
+ else:
++ cudart_version = cuda_version
+ cublas_version = cuda_version
+ cusolver_version = cuda_version
+ curand_version = cuda_version
+@@ -693,6 +700,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script):
+ cuda_toolkit_path = toolkit_path,
+ cuda_version = cuda_version,
++ cudart_version = cudart_version,
+ cublas_version = cublas_version,
+ cusolver_version = cusolver_version,
+ curand_version = curand_version,
+
+From 2642e93e6cbb7a3a1e916abf1ab8e18fa2735237 Mon Sep 17 00:00:00 2001
+From: Nathan Luehr <nluehr@nvidia.com>
+Date: Fri, 14 Aug 2020 13:21:58 -0700
+Subject: [PATCH 2/2] Use correct cudart soname in GetDsoHandle
+
+---
+ tensorflow/stream_executor/platform/default/dso_loader.cc | 3 ++-
+ third_party/gpus/cuda/cuda_config.h.tpl | 1 +
+ third_party/gpus/cuda_configure.bzl | 2 ++
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
+index 84293b7767a20..a78c738f32c2a 100644
+--- a/tensorflow/stream_executor/platform/default/dso_loader.cc
++++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
+@@ -31,6 +31,7 @@ namespace internal {
+
+ namespace {
+ string GetCudaVersion() { return TF_CUDA_VERSION; }
++string GetCudaRtVersion() { return TF_CUDART_VERSION; }
+ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
+ string GetCublasVersion() { return TF_CUBLAS_VERSION; }
+ string GetCusolverVersion() { return TF_CUSOLVER_VERSION; }
+@@ -77,7 +78,7 @@ port::StatusOr<void*> GetCudaDriverDsoHandle() {
+ }
+
+ port::StatusOr<void*> GetCudaRuntimeDsoHandle() {
+- return GetDsoHandle("cudart", GetCudaVersion());
++ return GetDsoHandle("cudart", GetCudaRtVersion());
+ }
+
+ port::StatusOr<void*> GetCublasDsoHandle() {
+diff --git a/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/gpus/cuda/cuda_config.h.tpl
+index b59889938b1a9..ab26686ccb8b2 100644
+--- a/third_party/gpus/cuda/cuda_config.h.tpl
++++ b/third_party/gpus/cuda/cuda_config.h.tpl
+@@ -17,6 +17,7 @@ limitations under the License.
+ #define CUDA_CUDA_CONFIG_H_
+
+ #define TF_CUDA_VERSION "%{cuda_version}"
++#define TF_CUDART_VERSION "%{cudart_version}"
+ #define TF_CUBLAS_VERSION "%{cublas_version}"
+ #define TF_CUSOLVER_VERSION "%{cusolver_version}"
+ #define TF_CURAND_VERSION "%{curand_version}"
+diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
+index 3e6bdc9d8eb22..f85a53b1593b4 100644
+--- a/third_party/gpus/cuda_configure.bzl
++++ b/third_party/gpus/cuda_configure.bzl
+@@ -824,6 +824,7 @@ filegroup(name="cudnn-include")
+ "cuda:cuda_config.h",
+ {
+ "%{cuda_version}": "",
++ "%{cudart_version}": "",
+ "%{cublas_version}": "",
+ "%{cusolver_version}": "",
+ "%{curand_version}": "",
+@@ -1289,6 +1290,7 @@ def _create_local_cuda_repository(repository_ctx):
+ tpl_paths["cuda:cuda_config.h"],
+ {
+ "%{cuda_version}": cuda_config.cuda_version,
++ "%{cudart_version}": cuda_config.cudart_version,
+ "%{cublas_version}": cuda_config.cublas_version,
+ "%{cusolver_version}": cuda_config.cusolver_version,
+ "%{curand_version}": cuda_config.curand_version,
diff --git a/fix-h5py3.0.patch b/fix-h5py3.0.patch
new file mode 100644
index 000000000000..18e55a5297a4
--- /dev/null
+++ b/fix-h5py3.0.patch
@@ -0,0 +1,18 @@
+diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py
+index d3bb10c98d..e89f5356bb 100644
+--- a/tensorflow/python/keras/saving/hdf5_format.py
++++ b/tensorflow/python/keras/saving/hdf5_format.py
+@@ -659,11 +659,11 @@ def load_weights_from_hdf5_group(f, layers):
+ and weights file.
+ """
+ if 'keras_version' in f.attrs:
+- original_keras_version = f.attrs['keras_version'].decode('utf8')
++ original_keras_version = f.attrs['keras_version']
+ else:
+ original_keras_version = '1'
+ if 'backend' in f.attrs:
+- original_backend = f.attrs['backend'].decode('utf8')
++ original_backend = f.attrs['backend']
+ else:
+ original_backend = None
+
diff --git a/numpy1.20.patch b/numpy1.20.patch
new file mode 100644
index 000000000000..5198d2d7997a
--- /dev/null
+++ b/numpy1.20.patch
@@ -0,0 +1,45 @@
+From 75ea0b31477d6ba9e990e296bbbd8ca4e7eebadf Mon Sep 17 00:00:00 2001
+From: Christian Sigg <csigg@google.com>
+Date: Fri, 26 Jun 2020 05:08:10 -0700
+Subject: [PATCH] Provide overload to cope with const-ness change of NumPy's
+ PyUFuncGenericFunction.
+
+See https://github.com/tensorflow/tensorflow/issues/40688, https://github.com/tensorflow/tensorflow/pull/40654.
+
+PiperOrigin-RevId: 318452381
+Change-Id: Icc5152f2b020ef19882a49e3c86ac80bbe048d64
+---
+ tensorflow/python/lib/core/bfloat16.cc | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc
+index feb01f11a1af2..bb6b720febe59 100644
+--- a/tensorflow/python/lib/core/bfloat16.cc
++++ b/tensorflow/python/lib/core/bfloat16.cc
+@@ -517,7 +517,7 @@ bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) {
+ }
+
+ template <typename InType, typename OutType, typename Functor>
+-void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
++void BinaryUFunc(char** args, const npy_intp* dimensions, const npy_intp* steps,
+ void* data) {
+ const char* i0 = args[0];
+ const char* i1 = args[1];
+@@ -532,11 +532,17 @@ void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+ }
+ }
+
++// Numpy changed const-ness of PyUFuncGenericFunction, provide overload.
+ template <typename Functor>
+ void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+ void* data) {
+ BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
+ }
++template <typename Functor>
++void CompareUFunc(char** args, const npy_intp* dimensions,
++ const npy_intp* steps, void* data) {
++ BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
++}
+
+ struct Bfloat16EqFunctor {
+ npy_bool operator()(bfloat16 a, bfloat16 b) { return a == b; }