misc improvements

* Make CUDA build optional as requested [1] * Use a better fix for protobuf 3.20 compatibility * Fix GCC 12 build errors [1] https://aur.archlinux.org/pkgbase/python-onnxruntime#comment-858912
author: Chih-Hsuan Yen 2022-05-30 01:05:56 +0800
committer: Chih-Hsuan Yen 2022-05-30 01:07:43 +0800
commit: 8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9 (patch)
tree: f2c583166c1824a648a670607d3be3b7dc45e280
parent: 7d7cb94685203169a7ae660620b35ed0bb484e26 (diff)
download: aur-8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9.tar.gz
3 files changed, 38 insertions, 35 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 32255bdb9002..f880005d70bd 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -17,6 +17,7 @@ pkgbase = python-onnxruntime
 	makedepends = cuda
 	makedepends = cudnn
 	makedepends = nccl
+	makedepends = gcc11
 	depends = nsync
 	depends = re2
 	depends = python-flatbuffers
@@ -47,7 +48,6 @@ pkgbase = python-onnxruntime
 	source = pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git
 	source = build-fixes.patch
 	source = install-orttraining-files.diff
-	source = protobuf-3.20.diff
 	source = system-dnnl.diff
 	sha512sums = SKIP
 	sha512sums = SKIP
@@ -58,7 +58,6 @@ pkgbase = python-onnxruntime
 	sha512sums = SKIP
 	sha512sums = 80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c
 	sha512sums = 06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c
-	sha512sums = 5b1b5c20efb2df48c651b957824d497e5465b2e572c9f12bf43546301ecc55f3ff5bb1004b491283a3957c18ff23220bad664dbcf6bcab9dc38cd77cdac30f6e
 	sha512sums = 6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08
 
 pkgname = python-onnxruntime
diff --git a/PKGBUILD b/PKGBUILD
index a79ea6429269..6d33f5f46f3f 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,9 +1,11 @@
 # Maintainer: Chih-Hsuan Yen <yan12125@gmail.com>
 
+_ENABLE_CUDA=1
+
 pkgbase=python-onnxruntime
 # Not split DNNL EP to another package as it's needed unconditionally at runtime if built at compile time
 # https://github.com/microsoft/onnxruntime/blob/v1.9.1/onnxruntime/python/onnxruntime_pybind_state.cc#L533
-pkgname=(python-onnxruntime python-onnxruntime-cuda)
+pkgname=(python-onnxruntime)
 pkgver=1.11.1
 pkgdesc='Cross-platform, high performance scoring engine for ML models'
 pkgrel=1
@@ -11,7 +13,7 @@ arch=(x86_64)
 url='https://github.com/microsoft/onnxruntime'
 license=(MIT)
 depends=(nsync re2 python-flatbuffers python-numpy python-protobuf openmpi onednn libprotobuf-lite.so)
-makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl)
+makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers)
 optdepends=(
   # https://github.com/microsoft/onnxruntime/pull/9969
   'python-onnx: for the backend API, quantization, orttraining, transformers and various tools'
@@ -38,7 +40,6 @@ source=("git+https://github.com/microsoft/onnxruntime#tag=v$pkgver"
         "pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git"
         build-fixes.patch
         install-orttraining-files.diff
-        protobuf-3.20.diff
         system-dnnl.diff)
 sha512sums=('SKIP'
             'SKIP'
@@ -49,11 +50,15 @@ sha512sums=('SKIP'
             'SKIP'
             '80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c'
             '06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c'
-            '5b1b5c20efb2df48c651b957824d497e5465b2e572c9f12bf43546301ecc55f3ff5bb1004b491283a3957c18ff23220bad664dbcf6bcab9dc38cd77cdac30f6e'
             '6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08')
 # CUDA seems not working with LTO
 options+=('!lto')
 
+if [[ $_ENABLE_CUDA = 1 ]]; then
+  pkgname+=(python-onnxruntime-cuda)
+  makedepends+=(cuda cudnn nccl gcc11)
+fi
+
 # Check PKGBUILDs of python-pytorch and tensorflow for CUDA architectures built by official packages
 _CUDA_ARCHITECTURES="52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;86-virtual"
 
@@ -62,9 +67,13 @@ prepare() {
 
   patch -Np1 -i ../build-fixes.patch
   patch -Np1 -i ../install-orttraining-files.diff
-  patch -Np1 -i ../protobuf-3.20.diff
   patch -Np1 -i ../system-dnnl.diff
 
+  # Protobuf 3.20 incompatibility https://github.com/microsoft/onnxruntime/pull/11639
+  git cherry-pick -n 6aa286f1e3ece96a7326ea55fdcd225f1ff8bbf2
+  # Fix building DNNL EP with GCC 12 https://github.com/microsoft/onnxruntime/pull/11667
+  git cherry-pick -n 59ca05cb1c1de0492d10ac895904b217c86e612d
+
   git submodule init
   for mod in onnx SafeInt tensorboard dlpack cxxopts pytorch_cpuinfo; do
     git config submodule.cmake/external/$mod.url "$srcdir"/$mod
@@ -78,6 +87,12 @@ prepare() {
 build() {
   cd "$srcdir"/onnxruntime
 
+  if [[ $_ENABLE_CUDA = 1 ]]; then
+    export CC=/usr/bin/gcc-11
+    export CXX=/usr/bin/g++-11
+    export CUDAHOSTCXX=$CXX
+  fi
+
   local cmake_args=(
     -DCMAKE_INSTALL_PREFIX=/usr
     -Donnxruntime_ENABLE_PYTHON=ON
@@ -98,21 +113,23 @@ build() {
     -Donnxruntime_USE_FULL_PROTOBUF=OFF
   )
 
-  # 1. Enable parallel builds for NVCC via -t0, which spawns multiple
-  #    cicc and ptxas processes for each nvcc invocation. The number of
-  #    total processes may be much larger than the number of cores - let
-  #    the scheduler handle it.
-  cmake_args+=(
-    -DCMAKE_CUDA_FLAGS="-t0"
-    -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES"
-    -DCMAKE_CUDA_STANDARD_REQUIRED=ON
-    -DCMAKE_CXX_STANDARD_REQUIRED=ON
-    -Donnxruntime_USE_CUDA=ON
-    -Donnxruntime_CUDA_HOME=/opt/cuda
-    -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc
-    -Donnxruntime_CUDNN_HOME=/usr
-    -Donnxruntime_USE_NCCL=ON
-  )
+  if [[ $_ENABLE_CUDA = 1 ]]; then
+    # 1. Enable parallel builds for NVCC via -t0, which spawns multiple
+    #    cicc and ptxas processes for each nvcc invocation. The number of
+    #    total processes may be much larger than the number of cores - let
+    #    the scheduler handle it.
+    cmake_args+=(
+      -DCMAKE_CUDA_FLAGS="-t0"
+      -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES"
+      -DCMAKE_CUDA_STANDARD_REQUIRED=ON
+      -DCMAKE_CXX_STANDARD_REQUIRED=ON
+      -Donnxruntime_USE_CUDA=ON
+      -Donnxruntime_CUDA_HOME=/opt/cuda
+      -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc
+      -Donnxruntime_CUDNN_HOME=/usr
+      -Donnxruntime_USE_NCCL=ON
+    )
+  fi
 
   cmake -B build -S cmake "${cmake_args[@]}" "$@"
 
diff --git a/protobuf-3.20.diff b/protobuf-3.20.diff
deleted file mode 100644
index 908f06e14105..000000000000
--- a/protobuf-3.20.diff
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
-index 085c1ffbe..a9070d5e4 100644
---- a/onnxruntime/core/framework/tensorprotoutils.cc
-+++ b/onnxruntime/core/framework/tensorprotoutils.cc
-@@ -256,7 +256,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
-                              ") does not match the data size(", tensor.field_size(), ") in proto");         \
-     auto& data = tensor.field_name();                                                                       \
-     for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                             \
--      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                   \
-+      *p_data++ = *data_iter;                                                                               \
-     return Status::OK();                                                                                    \
-   }
-
author	Chih-Hsuan Yen	2022-05-30 01:05:56 +0800
committer	Chih-Hsuan Yen	2022-05-30 01:07:43 +0800
commit	8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9 (patch)
tree	f2c583166c1824a648a670607d3be3b7dc45e280
parent	7d7cb94685203169a7ae660620b35ed0bb484e26 (diff)
download	aur-8d347bed37e512e7a9bdfbeea35d5cb47fcfb4a9.tar.gz