diff options
Diffstat (limited to 'PKGBUILD')
-rw-r--r-- | PKGBUILD | 57 |
1 files changed, 37 insertions, 20 deletions
@@ -1,9 +1,11 @@ # Maintainer: Chih-Hsuan Yen <yan12125@gmail.com> +_ENABLE_CUDA=1 + pkgbase=python-onnxruntime # Not split DNNL EP to another package as it's needed unconditionally at runtime if built at compile time # https://github.com/microsoft/onnxruntime/blob/v1.9.1/onnxruntime/python/onnxruntime_pybind_state.cc#L533 -pkgname=(python-onnxruntime python-onnxruntime-cuda) +pkgname=(python-onnxruntime) pkgver=1.11.1 pkgdesc='Cross-platform, high performance scoring engine for ML models' pkgrel=1 @@ -11,7 +13,7 @@ arch=(x86_64) url='https://github.com/microsoft/onnxruntime' license=(MIT) depends=(nsync re2 python-flatbuffers python-numpy python-protobuf openmpi onednn libprotobuf-lite.so) -makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers cuda cudnn nccl) +makedepends=(git cmake pybind11 python-setuptools nlohmann-json chrono-date boost eigen flatbuffers) optdepends=( # https://github.com/microsoft/onnxruntime/pull/9969 'python-onnx: for the backend API, quantization, orttraining, transformers and various tools' @@ -38,7 +40,6 @@ source=("git+https://github.com/microsoft/onnxruntime#tag=v$pkgver" "pytorch_cpuinfo::git+https://github.com/pytorch/cpuinfo.git" build-fixes.patch install-orttraining-files.diff - protobuf-3.20.diff system-dnnl.diff) sha512sums=('SKIP' 'SKIP' @@ -49,11 +50,15 @@ sha512sums=('SKIP' 'SKIP' '80ea85ea20bbbdec7991f965a66b627a5f42828bc0c72be0913078d927833a82402fb1af6c5c9f6ecae861b45582fa42c98ce83b02768e4bf875ab89dd1c607c' '06a002361cc324184d0bfcb520b472f57749c0537329f0e0dee833cc7fce2f08b14590b77bc0211422dfb933dbef6f249f19939f9e0df465c48ee8fc7827e31c' - '5b1b5c20efb2df48c651b957824d497e5465b2e572c9f12bf43546301ecc55f3ff5bb1004b491283a3957c18ff23220bad664dbcf6bcab9dc38cd77cdac30f6e' '6735c7aca2ba2f1f2a5286eb064125bf7f2c68a575d572dd157769d15778ff3e717b3a53d696c767748229f23ee6c3a7c82679df1d86283d7c4dd0ec9103ae08') # CUDA seems not working with LTO options+=('!lto') +if [[ $_ENABLE_CUDA = 1 ]]; then + pkgname+=(python-onnxruntime-cuda) + makedepends+=(cuda cudnn nccl gcc11) +fi + # Check PKGBUILDs of python-pytorch and tensorflow for CUDA architectures built by official packages _CUDA_ARCHITECTURES="52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;86-virtual" @@ -62,9 +67,13 @@ prepare() { patch -Np1 -i ../build-fixes.patch patch -Np1 -i ../install-orttraining-files.diff - patch -Np1 -i ../protobuf-3.20.diff patch -Np1 -i ../system-dnnl.diff + # Protobuf 3.20 incompatibility https://github.com/microsoft/onnxruntime/pull/11639 + git cherry-pick -n 6aa286f1e3ece96a7326ea55fdcd225f1ff8bbf2 + # Fix building DNNL EP with GCC 12 https://github.com/microsoft/onnxruntime/pull/11667 + git cherry-pick -n 59ca05cb1c1de0492d10ac895904b217c86e612d + git submodule init for mod in onnx SafeInt tensorboard dlpack cxxopts pytorch_cpuinfo; do git config submodule.cmake/external/$mod.url "$srcdir"/$mod @@ -78,6 +87,12 @@ prepare() { build() { cd "$srcdir"/onnxruntime + if [[ $_ENABLE_CUDA = 1 ]]; then + export CC=/usr/bin/gcc-11 + export CXX=/usr/bin/g++-11 + export CUDAHOSTCXX=$CXX + fi + local cmake_args=( -DCMAKE_INSTALL_PREFIX=/usr -Donnxruntime_ENABLE_PYTHON=ON @@ -98,21 +113,23 @@ build() { -Donnxruntime_USE_FULL_PROTOBUF=OFF ) - # 1. Enable parallel builds for NVCC via -t0, which spawns multiple - # cicc and ptxas processes for each nvcc invocation. The number of - # total processes may be much larger than the number of cores - let - # the scheduler handle it. - cmake_args+=( - -DCMAKE_CUDA_FLAGS="-t0" - -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES" - -DCMAKE_CUDA_STANDARD_REQUIRED=ON - -DCMAKE_CXX_STANDARD_REQUIRED=ON - -Donnxruntime_USE_CUDA=ON - -Donnxruntime_CUDA_HOME=/opt/cuda - -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc - -Donnxruntime_CUDNN_HOME=/usr - -Donnxruntime_USE_NCCL=ON - ) + if [[ $_ENABLE_CUDA = 1 ]]; then + # 1. Enable parallel builds for NVCC via -t0, which spawns multiple + # cicc and ptxas processes for each nvcc invocation. The number of + # total processes may be much larger than the number of cores - let + # the scheduler handle it. + cmake_args+=( + -DCMAKE_CUDA_FLAGS="-t0" + -DCMAKE_CUDA_ARCHITECTURES="$_CUDA_ARCHITECTURES" + -DCMAKE_CUDA_STANDARD_REQUIRED=ON + -DCMAKE_CXX_STANDARD_REQUIRED=ON + -Donnxruntime_USE_CUDA=ON + -Donnxruntime_CUDA_HOME=/opt/cuda + -DCMAKE_CUDA_COMPILER:PATH=/opt/cuda/bin/nvcc + -Donnxruntime_CUDNN_HOME=/usr + -Donnxruntime_USE_NCCL=ON + ) + fi cmake -B build -S cmake "${cmake_args[@]}" "$@" |