diff options
author | Chih-Hsuan Yen | 2022-05-30 01:05:56 +0800 |
---|---|---|
committer | Chih-Hsuan Yen | 2022-05-30 01:07:43 +0800 |
commit | 8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9 (patch) | |
tree | f2c583166c1824a648a670607d3be3b7dc45e280 | |
parent | 7d7cb94685203169a7ae660620b35ed0bb484e26 (diff) | |
download | aur-8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9.tar.gz |
misc improvements
* Make CUDA build optional as requested [1]
* Use a better fix for protobuf 3.20 compatibility
* Fix GCC 12 build errors
[1] https://aur.archlinux.org/pkgbase/python-onnxruntime#comment-858912
-rw-r--r-- | .SRCINFO | 3 | ||||
-rw-r--r-- | PKGBUILD | 57 | ||||
-rw-r--r-- | protobuf-3.20.diff | 13 |
3 files changed, 38 insertions, 35 deletions
@@ -17,6 +17,7 @@ pkgbase = python-onnxruntime makedepends = cuda makedepends = cudnn makedepends = nccl + makedepends = gcc11 depends = nsync depends = re2 depends = python-flatbuffers @@ -47,7 +48,6 @@ pkgbase = python-onnxruntime source = pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git source = build-fixes.patch source = install-orttraining-files.diff - source = protobuf-3.20.diff source = system-dnnl.diff sha512sums = SKIP sha512sums = SKIP @@ -58,7 +58,6 @@ pkgbase = python-onnxruntime sha512sums = SKIP sha512sums = 80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c sha512sums = 06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c - sha512sums = 5b1b5c20efb2df48c651b957824d497e5465b2e572c9f12bf43546301ecc55f3ff5bb1004b491283a3957c18ff23220bad664dbcf6bcab9dc38cd77cdac30f6e sha512sums = 6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08 pkgname = python-onnxruntime @@ -1,9 +1,11 @@ # Maintainer: Chih-Hsuan Yen <yan12125@gmail.com> +_ENABLE_CUDA=1 + pkgbase=python-onnxruntime # Not split DNNL EP to another package as it's needed unconditionally at runtime if built at compile time # https://github.com/microsoft/onnxruntime/blob/v1.9.1/onnxruntime/python/onnxruntime_pybind_state.cc#L533 -pkgname=(python-onnxruntime python-onnxruntime-cuda) +pkgname=(python-onnxruntime) pkgver=1.11.1 pkgdesc='Cross-platform, high performance scoring engine for ML models' pkgrel=1 @@ -11,7 +13,7 @@ arch=(x86_64) url='https://github.com/microsoft/onnxruntime' license=(MIT) depends=(nsync re2 python-flatbuffers python-numpy python-protobuf openmpi onednn libprotobuf-lite.so) -makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl) +makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers) optdepends=( # https://github.com/microsoft/onnxruntime/pull/9969 'python-onnx: for the backend API, quantization, orttraining, transformers and various tools' @@ -38,7 +40,6 @@ source=("git+https://github.com/microsoft/onnxruntime#tag=v$pkgver" "pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git" build-fixes.patch install-orttraining-files.diff - protobuf-3.20.diff system-dnnl.diff) sha512sums=('SKIP' 'SKIP' @@ -49,11 +50,15 @@ sha512sums=('SKIP' 'SKIP' '80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c' '06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c' - '5b1b5c20efb2df48c651b957824d497e5465b2e572c9f12bf43546301ecc55f3ff5bb1004b491283a3957c18ff23220bad664dbcf6bcab9dc38cd77cdac30f6e' '6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08') # CUDA seems not working with LTO options+=('!lto') +if [[ $_ENABLE_CUDA = 1 ]]; then + pkgname+=(python-onnxruntime-cuda) + makedepends+=(cuda cudnn nccl gcc11) +fi + # Check PKGBUILDs of python-pytorch and tensorflow for CUDA architectures built by official packages _CUDA_ARCHITECTURES="52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;86-virtual" @@ -62,9 +67,13 @@ prepare() { patch -Np1 -i ../build-fixes.patch patch -Np1 -i ../install-orttraining-files.diff - patch -Np1 -i ../protobuf-3.20.diff patch -Np1 -i ../system-dnnl.diff + # Protobuf 3.20 incompatibility https://github.com/microsoft/onnxruntime/pull/11639 + git cherry-pick -n 6aa286f1e3ece96a7326ea55fdcd225f1ff8bbf2 + # Fix building DNNL EP with GCC 12 https://github.com/microsoft/onnxruntime/pull/11667 + git cherry-pick -n 59ca05cb1c1de0492d10ac895904b217c86e612d + git submodule init for mod in onnx SafeInt tensorboard dlpack cxxopts pytorch_cpuinfo; do git config submodule.cmake/external/$mod.url "$srcdir"/$mod @@ -78,6 +87,12 @@ prepare() { build() { cd "$srcdir"/onnxruntime + if [[ $_ENABLE_CUDA = 1 ]]; then + export CC=/usr/bin/gcc-11 + export CXX=/usr/bin/g++-11 + export CUDAHOSTCXX=$CXX + fi + local cmake_args=( -DCMAKE_INSTALL_PREFIX=/usr -Donnxruntime_ENABLE_PYTHON=ON @@ -98,21 +113,23 @@ build() { -Donnxruntime_USE_FULL_PROTOBUF=OFF ) - # 1. Enable parallel builds for NVCC via -t0, which spawns multiple - # cicc and ptxas processes for each nvcc invocation. The number of - # total processes may be much larger than the number of cores - let - # the scheduler handle it. - cmake_args+=( - -DCMAKE_CUDA_FLAGS="-t0" - -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES" - -DCMAKE_CUDA_STANDARD_REQUIRED=ON - -DCMAKE_CXX_STANDARD_REQUIRED=ON - -Donnxruntime_USE_CUDA=ON - -Donnxruntime_CUDA_HOME=/opt/cuda - -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc - -Donnxruntime_CUDNN_HOME=/usr - -Donnxruntime_USE_NCCL=ON - ) + if [[ $_ENABLE_CUDA = 1 ]]; then + # 1. Enable parallel builds for NVCC via -t0, which spawns multiple + # cicc and ptxas processes for each nvcc invocation. The number of + # total processes may be much larger than the number of cores - let + # the scheduler handle it. + cmake_args+=( + -DCMAKE_CUDA_FLAGS="-t0" + -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES" + -DCMAKE_CUDA_STANDARD_REQUIRED=ON + -DCMAKE_CXX_STANDARD_REQUIRED=ON + -Donnxruntime_USE_CUDA=ON + -Donnxruntime_CUDA_HOME=/opt/cuda + -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc + -Donnxruntime_CUDNN_HOME=/usr + -Donnxruntime_USE_NCCL=ON + ) + fi cmake -B build -S cmake "${cmake_args[@]}" "$@" diff --git a/protobuf-3.20.diff b/protobuf-3.20.diff deleted file mode 100644 index 908f06e14105..000000000000 --- a/protobuf-3.20.diff +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc -index 085c1ffbe..a9070d5e4 100644 ---- a/onnxruntime/core/framework/tensorprotoutils.cc -+++ b/onnxruntime/core/framework/tensorprotoutils.cc -@@ -256,7 +256,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d - ") does not match the data size(", tensor.field_size(), ") in proto"); \ - auto& data = tensor.field_name(); \ - for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter) \ -- *p_data++ = *reinterpret_cast<const T*>(data_iter); \ -+ *p_data++ = *data_iter; \ - return Status::OK(); \ - } - |