summarylogtreecommitdiffstats
path: root/PKGBUILD
blob: 3e5b9b6f3c536268bc50c010579bb787a6ad029a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Maintainer: Daniel Bershatsky <bepshatsky@yandex.ru>

pkgbase='apache-arrow'
pkgname=('apache-arrow' 'python-apache-arrow')
pkgver=5.0.0
pkgrel=4
pkgdesc="Language-independent columnar memory format for flat and hierarchical data"
arch=('x86_64')
url='https://arrow.apache.org/'
license=('Apache')
depends=(
# General
    'boost-libs'
    'libutf8proc'
    're2'
# Data Serialisation
    'thrift'
    'rapidjson'
# Compression Codecs
    'brotli'
    'bzip2'
    'lz4'
    'snappy'
    'zlib'
    'zstd'
)

# Arrow uses vendored jemalloc with custom prefix. There is no build system
# generator like CMake for jemalloc. So, we will use both Make and Ninja. The
# former is used during jemalloc building and the latter is used to speed up
# compilation.
#
# Also, in order to save computations we are building PyArrow to wheel and then
# install it to fake root.
makedepends=(
    'boost'
    'cmake'
    'cython'
    'gcc'
    'gcc-libs'
    'make'
    'ninja'
    'python-numpy'
    'python-pip'
    'python-setuptools-scm'
    'python-wheel'
)
source=("https://github.com/apache/arrow/archive/$pkgbase-$pkgver.tar.gz")
md5sums=('9caf5dbd36ef4972c3a591bcfeaf59c8')

build(){
    export ARROW_HOME="$srcdir/arrow-$pkgbase-$pkgver/dist"
    export LD_LIBRARY_PATH="$ARROW_HOME/lib:$LD_LIBRARY_PATH"

    cd "$srcdir/arrow-$pkgbase-$pkgver"
    mkdir -p cpp/build
    pushd cpp/build

    # Setting -DARROW_PATHON=ON implies
    # -DARROW_{COMPUTE,CSV,DATASET,FILESYSTEM,HDFS,JSON}=ON.
    cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
          -DCMAKE_INSTALL_LIBDIR=lib \
          -DCMAKE_BUILD_TYPE=Release \
          -DARROW_SIMD_LEVEL=AVX2 \
          -DARROW_BUILD_STATIC=OFF \
          -DARROW_DEPENDENCY_SOURCE=SYSTEM \
          -DARROW_BUILD_UTILITIES=ON \
          -DARROW_COMPUTE=ON \
          -DARROW_CSV=ON \
          -DARROW_CUDA=OFF \
          -DARROW_DATASET=ON \
          -DARROW_FILESYSTEM=ON \
          -DARROW_FLIGHT=OFF \
          -DARROW_GANDIVA=OFF \
          -DARROW_GANDIVA_JAVA=OFF \
          -DARROW_HDFS=OFF \
          -DARROW_HIVESERVER2=OFF \
          -DARROW_IPC=ON \
          -DARROW_JSON=ON \
          -DARROW_ORC=OFF \
          -DARROW_PARQUET=ON \
          -DARROW_PLASMA=OFF \
          -DARROW_PLASMA_JAVA_CLIENT=OFF \
          -DARROW_PYTHON=ON \
          -DARROW_S3=OFF \
          -DARROW_WITH_BZ2=ON \
          -DARROW_WITH_ZLIB=ON \
          -DARROW_WITH_LZ4=ON \
          -DARROW_WITH_SNAPPY=ON \
          -DARROW_WITH_ZSTD=ON \
          -DARROW_WITH_BROTLI=ON \
          -DPython3_EXECUTABLE=/usr/bin/python3 \
          -G Ninja \
          ..
    ninja
    ninja install

    cd "$srcdir/arrow-$pkgbase-$pkgver"
    pushd python
    python setup.py build_ext --with-parquet --cmake-generator Ninja bdist_wheel
}

package_apache-arrow() {
    provides=('arrow' 'parquet-cpp')
    conflicts=('arrow' 'parquet-cpp')

    cd "$srcdir/arrow-$pkgbase-$pkgver/cpp/build"
    mkdir -p "$pkgdir"
    cmake -DCMAKE_INSTALL_PREFIX=$pkgdir/usr ..
    ninja install
    find "${pkgdir}/usr/lib/" -name "*testing*" -delete
}

package_python-apache-arrow() {
    pkgdesc="Language-independent columnar memory format for flat and hierarchical data (Python interface)"
    depends=('arrow' 'python' 'python-numpy')
    provides=('python-pyarrow')
    conflicts=('python-pyarrow')

    cd "$srcdir/arrow-$pkgbase-$pkgver/python"
    PIP_CONFIG_FILE=/dev/null pip install \
        --ignore-installed \
        --isolated \
        --no-deps \
        --root "$pkgdir" \
        dist/pyarrow*.whl

    find "$pkgdir/usr" -path '*/pyarrow*/tests*' -delete
    find "$pkgdir/usr" -type f -iname '*.cc' -delete
    find "$pkgdir/usr" -type f -iname '*.cpp' -delete
    find "$pkgdir/usr" -type f -iname '*.h' -delete
    find "$pkgdir/usr" -type f -iname '*.hpp' -delete
    find "$pkgdir/usr" -type f -iname '*.pxd' -delete
    find "$pkgdir/usr" -type f -iname '*.pxi' -delete
    find "$pkgdir/usr" -type f -iname '*.pyx' -delete
    find "$pkgdir/usr" -type d -empty -delete
}