summarylogtreecommitdiffstats
path: root/PKGBUILD
blob: 75214faf78408881ad2ec9a55f2ae2d5184f549c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
pkgname=python-vllm-rocm-git
_pkgname=vllm
pkgver=0.19.1rc0.r60.gf186cfe
pkgrel=1
pkgdesc="high-throughput and memory-efficient inference and serving engine for LLMs (ROCm support)"
arch=('x86_64')
url='https://github.com/vllm-project/vllm'
license=(Apache-2.0)
provides=(python-vllm-rocm vllm)
conflicts=(python-vllm-rocm python-vllm-cuda)
depends=(
  amdsmi
  numactl
  python-aiohttp
  python-blake3
  python-cachetools
  python-cbor2
  python-cloudpickle
  python-compressed-tensors
  python-diskcache
  python-einops
  python-fastapi
  python-gguf
  python-huggingface-hub
  python-ijson
  python-importlib-metadata
  python-jmespath
  python-mistral-common
  python-model-hosting-container-standards
  python-msgpack
  python-msgspec
  python-openai
  python-openai-harmony
  python-opencv
  python-partial-json-parser
  python-pillow
  python-prometheus-fastapi-instrumentator
  python-psutil
  python-py-cpuinfo
  python-pybase64
  python-pycountry
  python-pydantic
  python-pytorch-opt-rocm
  python-pyzmq
  python-seaborn
  python-setproctitle
  python-soundfile
  python-sphinx
  python-starlette
  python-sympy
  python-tiktoken
  python-torchvision
  python-tqdm
  python-transformers
  python-triton
  python-typing_extensions
  python-uvloop
  python-watchfiles
  rocblas
  uvicorn
)
makedepends=(
  git
  gcc
  cmake
  python-installer
  python-setuptools
  python-setuptools-scm
  python-build
)

optdepends=(
  'python-prometheus_client: Prometheus instrumentation library for Python applications'
  'python-outlines: guided text generation'
  'python-lark: parsing toolkit'
  'python-torchaudio: required for image processor of minicpm-o-2.6'
  'python-datasets: tools to benchmark scripts'
#not currently in aur
  'python-xgrammar: flexible structured generation'
  'python-depyf: required for debugging and profiling with complilation config'
  'python-lm-format-enforcer: required for JSON/REGEX llm output'
)

source=("git+https://github.com/vllm-project/vllm.git")
sha256sums=('SKIP')
options=('!lto')

pkgver() {
  cd "$srcdir/${_pkgname}"
  printf "%s" "$(git describe --long --tags --abbrev=7 | sed 's/\([^-]*-g\)/r\1/;s/-/./g' | sed 's/^v//')"
}

prepare() {
  cd "$srcdir/$_pkgname"
  rm -f "dist"/*

  # 放宽 python 版本
  sed -i 's/\(PYTHON_SUPPORTED_VERSIONS\s*"3.10" "3.11" "3.12" "3.13"\)/\1 "3.14"/' "CMakeLists.txt"
}

build() {
  cd "$srcdir/$_pkgname"

  if [[ -z "${ROCM_PATH}" ]]; then
    source /etc/profile
  fi

  export MAX_JOBS=$(nproc)
  export VLLM_TARGET_DEVICE=rocm
  export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
  export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
  export VLLM_USE_MMAP=0
  export VLLM_ROCM_USE_AITER=0
  export VLLM_ROCM_USE_AITER_MOE=0
  export VLLM_USE_TRITON_AWQ=1

  export CC="/opt/rocm/llvm/bin/clang"
  export CXX="/opt/rocm/llvm/bin/clang++"

  # 彻底禁用 LTO
  export CFLAGS="${CFLAGS/-flto/}"
  export CFLAGS="${CFLAGS/-flto=auto/}"
  export CXXFLAGS="${CXXFLAGS/-flto/}"
  export CXXFLAGS="${CXXFLAGS/-flto=auto/}"
  export LDFLAGS="${LDFLAGS/-flto/}"
  export LDFLAGS="${LDFLAGS/-flto=auto/}"
  export CMAKE_INTERPROCEDURAL_OPTIMIZATION=FALSE

  # 暴露默认可见性并确保扩展名正确
  # -Wno-macro-redefined 清除头文件重定义警告
  export CXXFLAGS="$CXXFLAGS -fvisibility=default -Wno-macro-redefined"
  export CFLAGS="$CFLAGS -fvisibility=default -Wno-macro-redefined"

  # 检查是否在 CI 环境中构建
  if [ -n "$CI" ] && [ "$CI" != 0 ]; then
    # https://llvm.org/docs/AMDGPUUsage.html
    # gfx906: MI 50/60, Radeon VII
    # gfx101x: RX 5000 Series
    # gfx103x: RX 6000 Series
    # gfx110x: RX 7000 Series
    # gfx1151: Strix Halo
    # gfx120x: RX 9000 Series
    PYTORCH_ROCM_ARCH="gfx906;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
  else
    # 本地构建, 针对当前设备优化
    _detected_archs=$(rocminfo | grep -oP 'Name:\s+\Kgfx\d+' | sort -u | tr '\n' ';' | sed 's/;$//')
    PYTORCH_ROCM_ARCH="$_detected_archs"
  fi
  msg2 "Building for ROCM=$PYTORCH_ROCM_ARCH"
  # Build
  python setup.py bdist_wheel --dist-dir=dist
  # python -m build --wheel --no-isolation # this does not work currently
}

package() {
  cd "$srcdir/$_pkgname"
  python -m installer --destdir="${pkgdir}" dist/*.whl

  # 配置 triton 环境变量
  install -Dm755 /dev/null "${pkgdir}/etc/profile.d/vllm-triton.sh"
  cat <<EOF > "${pkgdir}/etc/profile.d/vllm-triton.sh"
export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
export VLLM_TARGET_DEVICE=rocm
export VLLM_USE_MMAP=0
export VLLM_ROCM_USE_AITER=0
export VLLM_ROCM_USE_AITER_MOE=0
export VLLM_USE_TRITON_AWQ=1
EOF
}