blob: c78d53eb14870780634acf7621135825b3806293 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# Maintainer: Henry-ZHR <henry-zhr@qq.com>
_name=sentencepiece
pkgbase="${_name}"
pkgname=("${pkgbase}" "python-${pkgbase}")
pkgver=0.2.0
pkgrel=2
pkgdesc="Unsupervised text tokenizer for Neural Network-based text generation"
arch=('x86_64')
url="https://github.com/google/sentencepiece"
license=('Apache-2.0')
makedepends=('git' 'cmake'
'abseil-cpp' 'gperftools' 'protobuf'
'python' 'python-build' 'python-setuptools' 'python-wheel' 'python-installer')
checkdepends=('python-pytest')
_tag='17d7580d6407802f85855d2cc9190634e2c95624' # git rev-parse "v${pkgver}"
source=("${_name}::git+${url}.git#tag=${_tag}")
sha512sums=('SKIP')
pkgver() {
git -C "${_name}" describe --tags | sed 's/^v//'
}
prepare() {
cd "${_name}"
git clean -dfx
# Use shared libs for python module
sed -i 's/libsentencepiece.a/libsentencepiece.so/g' python/setup.py
sed -i 's/libsentencepiece_train.a/libsentencepiece_train.so/g' python/setup.py
}
build() {
cd "${_name}"
cmake -S . -B build \
-DCMAKE_INSTALL_PREFIX=/usr \
-DSPM_BUILD_TEST=ON \
-DSPM_ENABLE_TCMALLOC=ON \
-DSPM_ENABLE_SHARED=ON \
-DSPM_PROTOBUF_PROVIDER=package \
-DSPM_ABSL_PROVIDER=package \
-Wno-dev
cmake --build build --parallel $(nproc)
mkdir build/root
DESTDIR=build/root cmake --install build --prefix /
cd python
python -m build --wheel --no-isolation
}
check() {
cd "${_name}"
# Both tests are expected to fail
# Maybe https://github.com/google/sentencepiece/issues/966 ?
ctest --test-dir build --output-on-failure || true
(
cd python
local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
export PYTHONPATH="${PWD}/build/lib.linux-${CARCH}-cpython-${python_version}"
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}${srcdir}/${_name}/build/root/lib"
pytest test/ || true
)
}
package_sentencepiece() {
depends=('gcc-libs' 'glibc' 'abseil-cpp' 'gperftools' 'protobuf')
provides=('libsentencepiece.so' 'libsentencepiece_train.so')
DESTDIR="${pkgdir}" cmake --install "${_name}/build"
}
package_python-sentencepiece() {
pkgdesc="Python wrapper for SentencePiece"
depends=("${pkgbase}" 'python')
optdepends=('python-protobuf')
cd "${_name}/python"
python -m installer --destdir="${pkgdir}" dist/*.whl
}
|