blob: 16877802cd3e8487505d600e45845f800a7e548e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
# Maintainer: Daniel Bershatsky <bepshatsky@yandex.ru>
# Contributor: Filip Graliński <filipg@amu.edu.pl>
: ${CARGO_HOME:=$SRCDEST/cargo-home}
: ${CARGO_TARGET_DIR:=target}
: ${RUSTUP_TOOLCHAIN:=stable}
_module="tokenizers"
_pkgname="python-$_module"
pkgname="$_pkgname"
pkgver=0.21.0
pkgrel=1
pkgdesc='Fast State-of-the-Art Tokenizers optimized for Research and Production'
url="https://github.com/huggingface/tokenizers"
license=('Apache-2.0')
arch=('i686' 'x86_64')
depends=('python')
makedepends=(
'clang'
'rust-bindgen'
'python-build'
'python-installer'
'python-maturin'
'python-setuptools-rust'
'python-wheel'
)
checkdepends=(
'python-datasets' # AUR
#└─ 'python-huggingface-hub' # AUR
#└─ 'python-multiprocess' # AUR
'python-numpy'
'python-pyarrow'
'python-pytest'
'python-requests'
'python-setuptools-rust'
)
options=('!lto')
_pkgsrc="$_module-$pkgver"
_pkgext="tar.gz"
source=(
"$_pkgsrc.$_pkgext"::"$url/archive/refs/tags/v$pkgver.$_pkgext"
"norvig-big.txt"::"https://norvig.com/big.txt"
"roberta.json"::"https://huggingface.co/roberta-large/raw/main/tokenizer.json"
)
sha256sums=(
'841279ad797d575ed3cf31fc4f30e09e37acbd35028d30c51fc0879ef7ed4094'
'fa066c7d40f0f201ac4144e652aa62430e58a6b3805ec70650f678da5804e87b'
'847bbeab6174d66a88898f729d52fa8d355fafe1bea101cf960dd404581df70e'
)
_rust_env() {
export CARGO_HOME CARGO_TARGET_DIR RUSTUP_TOOLCHAIN
export GIT_DIR='.'
}
prepare() (
_rust_env
cd "$_pkgsrc/bindings/python"
# fix typo
sed -E -e 's@defaut@default@' -i Cargo.toml
cargo update
cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
)
build() (
_rust_env
cd "$_pkgsrc/bindings/python"
cargo build --frozen --release
python -m build --no-isolation --wheel
)
check() {
cd "$_pkgsrc/bindings/python"
# data
install -Dm644 "$srcdir/roberta.json" "data/roberta.json"
install -Dm644 "$srcdir/norvig-big.txt" "data/big.txt"
head -100 data/big.txt > data/small.txt
# test in venv
python -m venv --system-site-packages test-env
test-env/bin/python -m installer dist/*.whl
test-env/bin/python -m pytest -s -v tests || :
cargo test --no-default-features || :
}
package() {
cd "$_pkgsrc/bindings/python"
python -m installer --destdir="$pkgdir" dist/*.whl
}
|