blob: 79d87b9236d8c7c645e36e1ff9fb978f618ac424 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# Maintainer: Henry-ZHR <henry-zhr@qq.com>
_name=sentencepiece
pkgbase="${_name}"
pkgname=("${pkgbase}" "python-${pkgbase}")
pkgver=0.2.0
pkgrel=4
pkgdesc="Unsupervised text tokenizer for Neural Network-based text generation"
arch=('x86_64')
url="https://github.com/google/sentencepiece"
license=('Apache-2.0')
makedepends=('git' 'cmake'
'abseil-cpp' 'gperftools' 'protobuf'
'python' 'python-build' 'python-setuptools' 'python-wheel' 'python-installer')
checkdepends=('python-pytest')
_tag='17d7580d6407802f85855d2cc9190634e2c95624' # git rev-parse "v${pkgver}"
source=(
"${_name}::git+${url}.git#tag=${_tag}"
"fix-crash-in-unigram-model-training.patch::${url}/commit/d19ac45c919602cb041a86599d0593d24a150ac2.patch"
"bump-cmake-minimum-required-version.patch::${url}/commit/e2127b9b932ba00811d5023c5ea69a12a857b244.patch"
)
sha512sums=(
'SKIP'
'644bc47fb3b90f2447ae9aac5ff2939fa6c9b3b0dc33550828b8517656f33fb1b41b2ebf9443e4b39a64bb963533c8d7a323b100d0b37671b070b7368f6fb1c7'
'a4749510e7a4e5c72c60e67e903201d5f6b2224752059481613cb6e0e01c901d0bdbd83553ecc0b916f551e6f37342bab6bf298dfcdd5234129b1645299775b9'
)
pkgver() {
git -C "${_name}" describe --tags | sed 's/^v//'
}
prepare() {
cd "${_name}"
git clean -dfx
# See https://github.com/google/sentencepiece/pull/1088
# Should fix test for v0.2.0
git apply --verbose ../fix-crash-in-unigram-model-training.patch
# Fix build for CMake 4.0.0+
git apply --verbose ../bump-cmake-minimum-required-version.patch
# Use shared libs for python module
sed -i 's/libsentencepiece.a/libsentencepiece.so/g' python/setup.py
sed -i 's/libsentencepiece_train.a/libsentencepiece_train.so/g' python/setup.py
}
build() {
cd "${_name}"
cmake -S . -B build \
-DCMAKE_INSTALL_PREFIX=/usr \
-DSPM_BUILD_TEST=ON \
-DSPM_ENABLE_TCMALLOC=ON \
-DSPM_ENABLE_SHARED=ON \
-DSPM_PROTOBUF_PROVIDER=package \
-DSPM_ABSL_PROVIDER=package \
-Wno-dev
cmake --build build --parallel "$(nproc)"
mkdir build/root
DESTDIR=build/root cmake --install build --prefix /
cd python
python -m build --wheel --no-isolation
}
check() {
cd "${_name}"
ctest --test-dir build --output-on-failure
(
cd python
local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
export PYTHONPATH="${PWD}/build/lib.linux-${CARCH}-cpython-${python_version}"
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}${srcdir}/${_name}/build/root/lib"
pytest test/
)
}
package_sentencepiece() {
depends=('gcc-libs' 'glibc' 'abseil-cpp' 'gperftools' 'protobuf')
provides=('libsentencepiece.so' 'libsentencepiece_train.so')
DESTDIR="${pkgdir}" cmake --install "${_name}/build"
}
package_python-sentencepiece() {
pkgdesc="Python wrapper for SentencePiece"
depends=("${pkgbase}=${pkgver}-${pkgrel}" 'gcc-libs' 'glibc' 'python')
optdepends=('python-protobuf')
cd "${_name}/python"
python -m installer --destdir="${pkgdir}" dist/*.whl
}
|