summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorChih-Hsuan Yen2021-12-31 14:06:49 +0800
committerChih-Hsuan Yen2021-12-31 14:07:10 +0800
commit0185531906bda3a9aba93bbb0f3dcfeb0ae671ad (patch)
treec5101895f73be2e196ec65113b8dde92f72b256d
parenta84b5454e84aed96aa78cc311c407d496b3855d6 (diff)
downloadaur-0185531906bda3a9aba93bbb0f3dcfeb0ae671ad.tar.gz
various fixes/improvements
* Switch back from clang to gcc. Apparently upstream tests more on gcc than on clang, and there are several compatibility issues between onnxruntime and clang [1,2] as well as cuda and clang [3]. On the other hand, internal compiler errors from gcc have been fixed. * Add more optional dependencies for several sub-packages, as motivated by [4]. * Fix missing orttraining Python files, which is discovered when I'm checking optional dependencies. * Don't hard-code usage of GNU make, as suggested in [4]. [1] https://github.com/microsoft/onnxruntime/pull/10014 [2] https://github.com/microsoft/onnxruntime/pull/10160 [3] https://forums.developer.nvidia.com/t/building-with-clang-cuda-11-3-0-works-but-with-cuda-11-3-1-fails-regression/182176 [4] https://aur.archlinux.org/packages/python-onnxruntime/#comment-843401
-rw-r--r--.SRCINFO18
-rw-r--r--PKGBUILD63
-rw-r--r--clang.patch22
-rw-r--r--install-orttraining-files.diff19
4 files changed, 56 insertions, 66 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 35a78a664bcb..bcd326eccc15 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -17,7 +17,6 @@ pkgbase = python-onnxruntime
makedepends = cuda
makedepends = cudnn
makedepends = nccl
- makedepends = clang
depends = nsync
depends = re2
depends = python-flatbuffers
@@ -25,7 +24,18 @@ pkgbase = python-onnxruntime
depends = python-protobuf
depends = openmpi
depends = onednn
- optdepends = python-onnx: for the backend API and transformers
+ optdepends = python-onnx: for the backend API, quantization, orttraining, transformers and various tools
+ optdepends = python-coloredlogs: for transformers
+ optdepends = python-psutil: for transformers
+ optdepends = python-py-cpuinfo: for transformers
+ optdepends = python-py3nvml: for transformers
+ optdepends = python-packaging: for transformers and various tools
+ optdepends = python-transformers: for transformers
+ optdepends = python-scipy: for transformers and various tools
+ optdepends = python-pytorch: for transformers, orttraining and various tools
+ optdepends = python-cerberus: for orttraining
+ optdepends = python-h5py: for orttraining
+ optdepends = python-sympy: for transformers and various tools
options = !lto
source = git+https://github.com/microsoft/onnxruntime#tag=v1.10.0
source = git+https://github.com/onnx/onnx.git
@@ -35,7 +45,7 @@ pkgbase = python-onnxruntime
source = git+https://github.com/jarro2783/cxxopts.git
source = pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git
source = build-fixes.patch
- source = clang.patch
+ source = install-orttraining-files.diff
source = system-dnnl.diff
sha512sums = SKIP
sha512sums = SKIP
@@ -45,7 +55,7 @@ pkgbase = python-onnxruntime
sha512sums = SKIP
sha512sums = SKIP
sha512sums = 80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c
- sha512sums = ad94af8bb25744b244c4f82e9a06189741f82b295a88523ca0e8005568fac710c2299d783989457e9cf96ef8da0593fb4f70c8792d416f44ab29d6493e204f13
+ sha512sums = 06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c
sha512sums = 6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08
pkgname = python-onnxruntime
diff --git a/PKGBUILD b/PKGBUILD
index 794cf1edd1e0..7536179179e8 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -11,10 +11,21 @@ arch=(x86_64)
url='https://github.com/microsoft/onnxruntime'
license=(MIT)
depends=(nsync re2 python-flatbuffers python-numpy python-protobuf openmpi onednn)
-makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl clang)
+makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl)
optdepends=(
# https://github.com/microsoft/onnxruntime/pull/9969
- 'python-onnx: for the backend API and transformers'
+ 'python-onnx: for the backend API, quantization, orttraining, transformers and various tools'
+ 'python-coloredlogs: for transformers' # also used by TensorRT tools, but we don't build for it, anyway
+ 'python-psutil: for transformers'
+ 'python-py-cpuinfo: for transformers'
+ 'python-py3nvml: for transformers'
+ 'python-packaging: for transformers and various tools'
+ 'python-transformers: for transformers'
+ 'python-scipy: for transformers and various tools'
+ 'python-pytorch: for transformers, orttraining and various tools'
+ 'python-cerberus: for orttraining'
+ 'python-h5py: for orttraining'
+ 'python-sympy: for transformers and various tools'
)
# not de-vendored libraries
# onnx: needs shared libonnx (https://github.com/onnx/onnx/issues/3030)
@@ -26,7 +37,7 @@ source=("git+https://github.com/microsoft/onnxruntime#tag=v$pkgver"
"git+https://github.com/jarro2783/cxxopts.git"
"pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git"
build-fixes.patch
- clang.patch
+ install-orttraining-files.diff
system-dnnl.diff)
sha512sums=('SKIP'
'SKIP'
@@ -36,7 +47,7 @@ sha512sums=('SKIP'
'SKIP'
'SKIP'
'80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c'
- 'ad94af8bb25744b244c4f82e9a06189741f82b295a88523ca0e8005568fac710c2299d783989457e9cf96ef8da0593fb4f70c8792d416f44ab29d6493e204f13'
+ '06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c'
'6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08')
# CUDA seems not working with LTO
options+=('!lto')
@@ -48,10 +59,8 @@ prepare() {
cd onnxruntime
patch -Np1 -i ../build-fixes.patch
- patch -Np1 -i ../clang.patch
+ patch -Np1 -i ../install-orttraining-files.diff
patch -Np1 -i ../system-dnnl.diff
- # Fix building DNNL EP with clang https://github.com/microsoft/onnxruntime/pull/10014
- git cherry-pick -n c2d08a877b1f661eb99a29a57fd4184aa0918a80
git submodule init
for mod in onnx SafeInt tensorboard dlpack cxxopts pytorch_cpuinfo; do
@@ -83,36 +92,15 @@ build() {
-Donnxruntime_USE_FULL_PROTOBUF=OFF
)
- # 1. Redefine ___is_signed to ___is_signed to workaround a regression
- # from CUDA 11.3 -> 11.3.1 [1].
- # 2. Enable parallel builds for NVCC via -t0, which spawns multiple
+ # 1. Enable parallel builds for NVCC via -t0, which spawns multiple
# cicc and ptxas processes for each nvcc invocation. The number of
# total processes may be much larger than the number of cores - let
# the scheduler handle it.
- # 3. Work-around the "error: type-id cannot have a name" issue with
- # -DCMAKE_CUDA_STANDARD_REQUIRED=ON, which forces -std= to be
- # specified [2].
- #
- # $ echo "#include <type_traits>" | nvcc -ccbin /usr/bin/clang -x cu -c - -o /dev/null -v --keep
- # /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/11.1.0/../../../../include/c++/11.1.0/type_traits:591:162: error: type-id cannot have a name
- # template< class _Tp> using __is_signed_integer = __is_one_of< __remove_cv_t< _Tp> , signed char, signed short, signed int, signed long, signed long long, signed __int128_t> ;
- # ^
- # 1 error generated.
- #
- # It is a clang bug exposed by CMake and CUDA. Since CMake 3.22,
- # -std= flag is no longer specified to nvcc (related to
- # CMP0128 [3] ?). On the other hand, when no -std= option is
- # specified, clang defines -D__GLIBCXX_TYPE_INT_N_0=__int128, and
- # cudafe++ somehow replaces __int128 with __int128_t, which does
- # not work with signed/unsigned in clang.
- # [1] https://forums.developer.nvidia.com/t/182176
- # [2] https://cmake.org/cmake/help/latest/prop_tgt/LANG_STANDARD_REQUIRED.html
- # [3] https://cmake.org/cmake/help/latest/policy/CMP0128.html
cmake_args+=(
- -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/clang
- -DCMAKE_CUDA_FLAGS="-D__is_signed=___is_signed -t0"
+ -DCMAKE_CUDA_FLAGS="-t0"
-DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES"
-DCMAKE_CUDA_STANDARD_REQUIRED=ON
+ -DCMAKE_CXX_STANDARD_REQUIRED=ON
-Donnxruntime_USE_CUDA=ON
-Donnxruntime_CUDA_HOME=/opt/cuda
-DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc
@@ -120,22 +108,17 @@ build() {
-Donnxruntime_USE_NCCL=ON
)
- # Use clang as GCC does not work. GCC 11 crashes with internal
- # compiler errors. GCC 10 does not work as some dependent packages
- # (ex: re2) are built with libstdc++ from GCC 11, and thus linking
- # onnxruntime with libstdc++ 10 fails.
- CC=/usr/bin/clang CXX=/usr/bin/clang++ \
- cmake -B build -S cmake "${cmake_args[@]}" "$@"
+ cmake -B build -S cmake "${cmake_args[@]}" "$@"
cd build
- make
+ cmake --build .
python ../setup.py build
}
package_python-onnxruntime() {
cd onnxruntime/build
- make install DESTDIR="$pkgdir"
+ DESTDIR="$pkgdir" cmake --install .
python ../setup.py install --root="$pkgdir" --skip-build --optimize=1
@@ -144,7 +127,7 @@ package_python-onnxruntime() {
for f in LICENSE ThirdPartyNotices.txt ; do
ln -s "$PY_ORT_DIR/$f" "$pkgdir"/usr/share/licenses/$pkgname/$f
done
- # already installed by `make install`, and not useful as this path is not looked up by the linker
+ # already installed by `cmake --install`, and not useful as this path is not looked up by the linker
rm -vf "$pkgdir/$PY_ORT_DIR"/capi/libonnxruntime_providers_*
# installed as split packages
diff --git a/clang.patch b/clang.patch
deleted file mode 100644
index ab6bb90fa315..000000000000
--- a/clang.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-diff --git a/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc b/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc
-index 955df6d9a..f9fd53e15 100644
---- a/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc
-+++ b/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc
-@@ -39,7 +39,7 @@ optional<std::pair<int64_t, int64_t>> GetMinAndMaxContiguousAxes(
- }
-
- // normalize axis values and sort
-- const std::vector<int64_t> axes = [&original_axes, rank]() {
-+ const std::vector<int64_t> axes = [&original_axes, rank]() -> std::vector<int64_t> {
- std::vector<int64_t> result(original_axes);
- std::for_each(
- result.begin(), result.end(),
-@@ -85,7 +85,7 @@ optional<std::pair<int64_t, int64_t>> GetMinAndMaxContiguousAxes(
- return std::distance(dims.begin(), before_min_axis_rit.base());
- }();
-
-- const int64_t max_axis = [&dims, &axes, &is_dim_one]() {
-+ const int64_t max_axis = [&dims, &axes, &is_dim_one]() -> int64_t {
- const auto& max_given_axis = axes.back();
- const auto after_max_given_axis_it = dims.begin() + max_given_axis + 1;
- const auto after_max_axis_it =
diff --git a/install-orttraining-files.diff b/install-orttraining-files.diff
new file mode 100644
index 000000000000..e95601fcd183
--- /dev/null
+++ b/install-orttraining-files.diff
@@ -0,0 +1,19 @@
+--- a/setup.py 2021-12-29 22:44:09.924917943 +0800
++++ b/setup.py 2021-12-29 22:49:16.216878004 +0800
+@@ -355,7 +355,7 @@
+ 'Operating System :: Microsoft :: Windows',
+ 'Operating System :: MacOS'])
+
+-if enable_training:
++if True:
+ packages.extend(['onnxruntime.training',
+ 'onnxruntime.training.amp',
+ 'onnxruntime.training.optim',
+@@ -373,6 +373,7 @@
+ package_data['onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.torch_gpu_allocator'] = ['*.cc']
+ package_data['onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.fused_ops'] = \
+ ['*.cpp', '*.cu', '*.cuh', '*.h']
++if enable_training:
+ requirements_file = "requirements-training.txt"
+ # with training, we want to follow this naming convention:
+ # stable: