diff options
author | Chih-Hsuan Yen | 2021-12-31 14:06:49 +0800 |
---|---|---|
committer | Chih-Hsuan Yen | 2021-12-31 14:07:10 +0800 |
commit | 0185531906bda3a9aba93bbb0f3dcfeb0ae671ad (patch) | |
tree | c5101895f73be2e196ec65113b8dde92f72b256d | |
parent | a84b5454e84aed96aa78cc311c407d496b3855d6 (diff) | |
download | aur-0185531906bda3a9aba93bbb0f3dcfeb0ae671ad.tar.gz |
various fixes/improvements
* Switch back from clang to gcc. Apparently upstream tests more on gcc
than on clang, and there are several compatibility issues between
onnxruntime and clang [1,2] as well as cuda and clang [3]. On the
other hand, internal compiler errors from gcc have been fixed.
* Add more optional dependencies for several sub-packages, as motivated
by [4].
* Fix missing orttraining Python files, which is discovered when I'm
checking optional dependencies.
* Don't hard-code usage of GNU make, as suggested in [4].
[1] https://github.com/microsoft/onnxruntime/pull/10014
[2] https://github.com/microsoft/onnxruntime/pull/10160
[3] https://forums.developer.nvidia.com/t/building-with-clang-cuda-11-3-0-works-but-with-cuda-11-3-1-fails-regression/182176
[4] https://aur.archlinux.org/packages/python-onnxruntime/#comment-843401
-rw-r--r-- | .SRCINFO | 18 | ||||
-rw-r--r-- | PKGBUILD | 63 | ||||
-rw-r--r-- | clang.patch | 22 | ||||
-rw-r--r-- | install-orttraining-files.diff | 19 |
4 files changed, 56 insertions, 66 deletions
@@ -17,7 +17,6 @@ pkgbase = python-onnxruntime makedepends = cuda makedepends = cudnn makedepends = nccl - makedepends = clang depends = nsync depends = re2 depends = python-flatbuffers @@ -25,7 +24,18 @@ pkgbase = python-onnxruntime depends = python-protobuf depends = openmpi depends = onednn - optdepends = python-onnx: for the backend API and transformers + optdepends = python-onnx: for the backend API, quantization, orttraining, transformers and various tools + optdepends = python-coloredlogs: for transformers + optdepends = python-psutil: for transformers + optdepends = python-py-cpuinfo: for transformers + optdepends = python-py3nvml: for transformers + optdepends = python-packaging: for transformers and various tools + optdepends = python-transformers: for transformers + optdepends = python-scipy: for transformers and various tools + optdepends = python-pytorch: for transformers, orttraining and various tools + optdepends = python-cerberus: for orttraining + optdepends = python-h5py: for orttraining + optdepends = python-sympy: for transformers and various tools options = !lto source = git+https://github.com/microsoft/onnxruntime#tag=v1.10.0 source = git+https://github.com/onnx/onnx.git @@ -35,7 +45,7 @@ pkgbase = python-onnxruntime source = git+https://github.com/jarro2783/cxxopts.git source = pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git source = build-fixes.patch - source = clang.patch + source = install-orttraining-files.diff source = system-dnnl.diff sha512sums = SKIP sha512sums = SKIP @@ -45,7 +55,7 @@ pkgbase = python-onnxruntime sha512sums = SKIP sha512sums = SKIP sha512sums = 80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c - sha512sums = ad94af8bb25744b244c4f82e9a06189741f82b295a88523ca0e8005568fac710c2299d783989457e9cf96ef8da0593fb4f70c8792d416f44ab29d6493e204f13 + sha512sums = 06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c sha512sums = 6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08 pkgname = python-onnxruntime @@ -11,10 +11,21 @@ arch=(x86_64) url='https://github.com/microsoft/onnxruntime' license=(MIT) depends=(nsync re2 python-flatbuffers python-numpy python-protobuf openmpi onednn) -makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl clang) +makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl) optdepends=( # https://github.com/microsoft/onnxruntime/pull/9969 - 'python-onnx: for the backend API and transformers' + 'python-onnx: for the backend API, quantization, orttraining, transformers and various tools' + 'python-coloredlogs: for transformers' # also used by TensorRT tools, but we don't build for it, anyway + 'python-psutil: for transformers' + 'python-py-cpuinfo: for transformers' + 'python-py3nvml: for transformers' + 'python-packaging: for transformers and various tools' + 'python-transformers: for transformers' + 'python-scipy: for transformers and various tools' + 'python-pytorch: for transformers, orttraining and various tools' + 'python-cerberus: for orttraining' + 'python-h5py: for orttraining' + 'python-sympy: for transformers and various tools' ) # not de-vendored libraries # onnx: needs shared libonnx (https://github.com/onnx/onnx/issues/3030) @@ -26,7 +37,7 @@ source=("git+https://github.com/microsoft/onnxruntime#tag=v$pkgver" "git+https://github.com/jarro2783/cxxopts.git" "pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git" build-fixes.patch - clang.patch + install-orttraining-files.diff system-dnnl.diff) sha512sums=('SKIP' 'SKIP' @@ -36,7 +47,7 @@ sha512sums=('SKIP' 'SKIP' 'SKIP' '80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c' - 'ad94af8bb25744b244c4f82e9a06189741f82b295a88523ca0e8005568fac710c2299d783989457e9cf96ef8da0593fb4f70c8792d416f44ab29d6493e204f13' + '06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c' '6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08') # CUDA seems not working with LTO options+=('!lto') @@ -48,10 +59,8 @@ prepare() { cd onnxruntime patch -Np1 -i ../build-fixes.patch - patch -Np1 -i ../clang.patch + patch -Np1 -i ../install-orttraining-files.diff patch -Np1 -i ../system-dnnl.diff - # Fix building DNNL EP with clang https://github.com/microsoft/onnxruntime/pull/10014 - git cherry-pick -n c2d08a877b1f661eb99a29a57fd4184aa0918a80 git submodule init for mod in onnx SafeInt tensorboard dlpack cxxopts pytorch_cpuinfo; do @@ -83,36 +92,15 @@ build() { -Donnxruntime_USE_FULL_PROTOBUF=OFF ) - # 1. Redefine ___is_signed to ___is_signed to workaround a regression - # from CUDA 11.3 -> 11.3.1 [1]. - # 2. Enable parallel builds for NVCC via -t0, which spawns multiple + # 1. Enable parallel builds for NVCC via -t0, which spawns multiple # cicc and ptxas processes for each nvcc invocation. The number of # total processes may be much larger than the number of cores - let # the scheduler handle it. - # 3. Work-around the "error: type-id cannot have a name" issue with - # -DCMAKE_CUDA_STANDARD_REQUIRED=ON, which forces -std= to be - # specified [2]. - # - # $ echo "#include <type_traits>" | nvcc -ccbin /usr/bin/clang -x cu -c - -o /dev/null -v --keep - # /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/11.1.0/../../../../include/c++/11.1.0/type_traits:591:162: error: type-id cannot have a name - # template< class _Tp> using __is_signed_integer = __is_one_of< __remove_cv_t< _Tp> , signed char, signed short, signed int, signed long, signed long long, signed __int128_t> ; - # ^ - # 1 error generated. - # - # It is a clang bug exposed by CMake and CUDA. Since CMake 3.22, - # -std= flag is no longer specified to nvcc (related to - # CMP0128 [3] ?). On the other hand, when no -std= option is - # specified, clang defines -D__GLIBCXX_TYPE_INT_N_0=__int128, and - # cudafe++ somehow replaces __int128 with __int128_t, which does - # not work with signed/unsigned in clang. - # [1] https://forums.developer.nvidia.com/t/182176 - # [2] https://cmake.org/cmake/help/latest/prop_tgt/LANG_STANDARD_REQUIRED.html - # [3] https://cmake.org/cmake/help/latest/policy/CMP0128.html cmake_args+=( - -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/clang - -DCMAKE_CUDA_FLAGS="-D__is_signed=___is_signed -t0" + -DCMAKE_CUDA_FLAGS="-t0" -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES" -DCMAKE_CUDA_STANDARD_REQUIRED=ON + -DCMAKE_CXX_STANDARD_REQUIRED=ON -Donnxruntime_USE_CUDA=ON -Donnxruntime_CUDA_HOME=/opt/cuda -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc @@ -120,22 +108,17 @@ build() { -Donnxruntime_USE_NCCL=ON ) - # Use clang as GCC does not work. GCC 11 crashes with internal - # compiler errors. GCC 10 does not work as some dependent packages - # (ex: re2) are built with libstdc++ from GCC 11, and thus linking - # onnxruntime with libstdc++ 10 fails. - CC=/usr/bin/clang CXX=/usr/bin/clang++ \ - cmake -B build -S cmake "${cmake_args[@]}" "$@" + cmake -B build -S cmake "${cmake_args[@]}" "$@" cd build - make + cmake --build . python ../setup.py build } package_python-onnxruntime() { cd onnxruntime/build - make install DESTDIR="$pkgdir" + DESTDIR="$pkgdir" cmake --install . python ../setup.py install --root="$pkgdir" --skip-build --optimize=1 @@ -144,7 +127,7 @@ package_python-onnxruntime() { for f in LICENSE ThirdPartyNotices.txt ; do ln -s "$PY_ORT_DIR/$f" "$pkgdir"/usr/share/licenses/$pkgname/$f done - # already installed by `make install`, and not useful as this path is not looked up by the linker + # already installed by `cmake --install`, and not useful as this path is not looked up by the linker rm -vf "$pkgdir/$PY_ORT_DIR"/capi/libonnxruntime_providers_* # installed as split packages diff --git a/clang.patch b/clang.patch deleted file mode 100644 index ab6bb90fa315..000000000000 --- a/clang.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc b/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc -index 955df6d9a..f9fd53e15 100644 ---- a/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc -+++ b/onnxruntime/core/providers/cuda/reduction/reduction_functions.cc -@@ -39,7 +39,7 @@ optional<std::pair<int64_t, int64_t>> GetMinAndMaxContiguousAxes( - } - - // normalize axis values and sort -- const std::vector<int64_t> axes = [&original_axes, rank]() { -+ const std::vector<int64_t> axes = [&original_axes, rank]() -> std::vector<int64_t> { - std::vector<int64_t> result(original_axes); - std::for_each( - result.begin(), result.end(), -@@ -85,7 +85,7 @@ optional<std::pair<int64_t, int64_t>> GetMinAndMaxContiguousAxes( - return std::distance(dims.begin(), before_min_axis_rit.base()); - }(); - -- const int64_t max_axis = [&dims, &axes, &is_dim_one]() { -+ const int64_t max_axis = [&dims, &axes, &is_dim_one]() -> int64_t { - const auto& max_given_axis = axes.back(); - const auto after_max_given_axis_it = dims.begin() + max_given_axis + 1; - const auto after_max_axis_it = diff --git a/install-orttraining-files.diff b/install-orttraining-files.diff new file mode 100644 index 000000000000..e95601fcd183 --- /dev/null +++ b/install-orttraining-files.diff @@ -0,0 +1,19 @@ +--- a/setup.py 2021-12-29 22:44:09.924917943 +0800 ++++ b/setup.py 2021-12-29 22:49:16.216878004 +0800 +@@ -355,7 +355,7 @@ + 'Operating System :: Microsoft :: Windows', + 'Operating System :: MacOS']) + +-if enable_training: ++if True: + packages.extend(['onnxruntime.training', + 'onnxruntime.training.amp', + 'onnxruntime.training.optim', +@@ -373,6 +373,7 @@ + package_data['onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.torch_gpu_allocator'] = ['*.cc'] + package_data['onnxruntime.training.ortmodule.torch_cpp_extensions.cuda.fused_ops'] = \ + ['*.cpp', '*.cu', '*.cuh', '*.h'] ++if enable_training: + requirements_file = "requirements-training.txt" + # with training, we want to follow this naming convention: + # stable: |