aboutsummarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelix Erkinger2024-06-04 15:14:26 +0200
committerFelix Erkinger2024-06-04 15:14:26 +0200
commit21c8c0c47f8601d34fab87ce98da60146d225829 (patch)
tree2a1db90baf6514acdcc0e68eadf7752fd02d6de5
parentcf43ecad36ee21e6b0c03d32dda80959a372a54f (diff)
downloadaur-21c8c0c47f8601d34fab87ce98da60146d225829.tar.gz
WIP: hipblas, upgpkg: localai-git 2.16.0.76.g34ab442c-2
-rw-r--r--.SRCINFO6
-rw-r--r--2485-hipblas.patch128
-rw-r--r--PKGBUILD26
3 files changed, 154 insertions, 6 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 0c3ba38d400a..4fa243067695 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = localai-git
pkgdesc = Self-hosted OpenAI API alternative - Open Source, community-driven and local-first.
- pkgver = 2.16.0.74.g6ef78ef7
+ pkgver = 2.16.0.76.g34ab442c
pkgrel = 2
url = https://github.com/mudler/LocalAI
arch = x86_64
@@ -34,12 +34,13 @@ pkgbase = localai-git
depends = python-torchaudio
depends = python-torchvision
provides = localai
- provides = local-ai=2.16.0.74.g6ef78ef7
+ provides = local-ai=2.16.0.76.g34ab442c
conflicts = localai
conflicts = local-ai
backup = etc/localai/localai.conf
source = localai::git+https://github.com/mudler/LocalAI
source = libbackend.patch
+ source = 2485-hipblas.patch
source = README.md
source = localai.conf
source = localai.service
@@ -52,6 +53,7 @@ pkgbase = localai-git
sha256sums = SKIP
sha256sums = SKIP
sha256sums = SKIP
+ sha256sums = SKIP
pkgname = localai-git
depends = protobuf
diff --git a/2485-hipblas.patch b/2485-hipblas.patch
new file mode 100644
index 000000000000..9695121d9d98
--- /dev/null
+++ b/2485-hipblas.patch
@@ -0,0 +1,128 @@
+From 317875873ef6b14316c87d011185577172be65bd Mon Sep 17 00:00:00 2001
+From: Ettore Di Giacinto <mudler@localai.io>
+Date: Tue, 4 Jun 2024 13:02:22 +0200
+Subject: [PATCH] feat(amdgpu): try to build in single binary
+
+Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
+---
+ .github/workflows/release.yaml | 10 ++++++++++
+ Makefile | 8 ++++++++
+ pkg/model/initializers.go | 20 ++++++++++++++++++--
+ 3 files changed, 36 insertions(+), 2 deletions(-)
+
+diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
+index 618c81a39af..f9e734c0f5a 100644
+--- a/.github/workflows/release.yaml
++++ b/.github/workflows/release.yaml
+@@ -38,6 +38,15 @@ jobs:
+ sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+ env:
+ CUDA_VERSION: 12-3
++ - name: "Install Hipblas"
++ run: |
++ sudo apt-get update && \
++ sudo apt-get install -y --no-install-recommends \
++ hipblas-dev \
++ rocblas-dev && \
++ sudo apt-get clean && \
++ sudo rm -rf /var/lib/apt/lists/* && \
++ sudo ldconfig
+ - name: Cache grpc
+ id: cache-grpc
+ uses: actions/cache@v4
+@@ -61,6 +70,7 @@ jobs:
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+ export PATH=$PATH:$GOPATH/bin
+ export PATH=/usr/local/cuda/bin:$PATH
++ export PATH=/opt/rocm/bin:$PATH
+ GO_TAGS=p2p make dist
+ - uses: actions/upload-artifact@v4
+ with:
+diff --git a/Makefile b/Makefile
+index f2c03086662..c0abfc2ae80 100644
+--- a/Makefile
++++ b/Makefile
+@@ -327,6 +327,7 @@ ifeq ($(OS),Darwin)
+ $(info ${GREEN}I Skip CUDA build on MacOS${RESET})
+ else
+ $(MAKE) backend-assets/grpc/llama-cpp-cuda
++ $(MAKE) backend-assets/grpc/llama-cpp-hipblas
+ endif
+ $(MAKE) build
+ mkdir -p release
+@@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
+ CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
+ cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
+
++backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
++ cp -rf backend/cpp/llama backend/cpp/llama-hipblas
++ $(MAKE) -C backend/cpp/llama-hipblas purge
++ $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
++ BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
++ cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
++
+ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
+ cp -rf backend/cpp/llama backend/cpp/llama-grpc
+ $(MAKE) -C backend/cpp/llama-grpc purge
+diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
+index d013740ce5d..e9001f0a968 100644
+--- a/pkg/model/initializers.go
++++ b/pkg/model/initializers.go
+@@ -37,6 +37,7 @@ const (
+ LLamaCPPAVX = "llama-cpp-avx"
+ LLamaCPPFallback = "llama-cpp-fallback"
+ LLamaCPPCUDA = "llama-cpp-cuda"
++ LLamaCPPHipblas = "llama-cpp-hipblas"
+ LLamaCPPGRPC = "llama-cpp-grpc"
+
+ Gpt4AllLlamaBackend = "gpt4all-llama"
+@@ -93,7 +94,7 @@ ENTRY:
+ if autoDetect {
+ // if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
+ // when starting the service
+- foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
++ foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
+ if _, ok := backends[LLamaCPP]; !ok {
+ for _, e := range entry {
+ if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
+@@ -116,6 +117,10 @@ ENTRY:
+ backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
+ foundLCPPCuda = true
+ }
++ if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
++ backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
++ foundLCPPHipblas = true
++ }
+ }
+ }
+ }
+@@ -169,6 +174,7 @@ ENTRY:
+ // selectGRPCProcess selects the GRPC process to start based on system capabilities
+ func selectGRPCProcess(backend, assetDir string) string {
+ foundCUDA := false
++ foundAMDGPU := false
+ var grpcProcess string
+
+ // Select backend now just for llama.cpp
+@@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string {
+ log.Info().Msgf("GPU device found but no CUDA backend present")
+ }
+ }
++ if strings.Contains(gpu.String(), "amd") {
++ p := backendPath(assetDir, LLamaCPPHipblas)
++ if _, err := os.Stat(p); err == nil {
++ log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
++ grpcProcess = p
++ foundAMDGPU = true
++ } else {
++ log.Info().Msgf("GPU device found but no HIPBLAS backend present")
++ }
++ }
+ }
+ }
+
+- if foundCUDA {
++ if foundCUDA || foundAMDGPU {
+ return grpcProcess
+ }
+
diff --git a/PKGBUILD b/PKGBUILD
index d6909ebec8cb..8ceba5487a16 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -7,6 +7,7 @@ _ENABLE_ROCM=${_ENABLE_ROCM:-1}
# additional backends if set to 1
_ENABLE_PYTHON=${_ENABLE_PYTHON:-1}
+# piper build is currently broken, disable it
_ENABLE_PIPER=${_ENABLE_PIPER:-0}
# if GPU_TARGETS and AMDGPU_TARGETS are not set, mirror architecture list from arch:python-pytorch@2.3.0-2
@@ -35,8 +36,7 @@ else
fi
# enabled backends
-_GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2 \
-backend-assets/grpc/whisper \
+_GRPC_BACKENDS="backend-assets/grpc/whisper \
backend-assets/grpc/local-store \
$_OPTIONAL_GRPC"
# disabled backends: backend-assets/util/llama-cpp-rpc-server llama-cpp-grpc llama-ggml gpt4all rwkv tinydream bert-embeddings huggingface stablediffusion
@@ -44,7 +44,7 @@ $_OPTIONAL_GRPC"
_pkgbase="localai"
pkgbase="${_pkgbase}-git"
pkgname=()
-pkgver=2.16.0.74.g6ef78ef7
+pkgver=2.16.0.76.g34ab442c
pkgrel=2
pkgdesc="Self-hosted OpenAI API alternative - Open Source, community-driven and local-first."
url="https://github.com/mudler/LocalAI"
@@ -57,6 +57,7 @@ backup=("etc/${_pkgbase}/${_pkgbase}.conf")
source=(
"${_pkgbase}"::"git+https://github.com/mudler/LocalAI"
"libbackend.patch"
+ "2485-hipblas.patch"
"README.md"
"${_pkgbase}.conf"
"${_pkgbase}.service"
@@ -72,6 +73,7 @@ sha256sums=(
'SKIP'
'SKIP'
'SKIP'
+ 'SKIP'
)
depends=(
@@ -181,6 +183,9 @@ EOF
# modify python backend build library to use --system-site-packages, and dont reinstall torch*
patch -N -i "${srcdir}/libbackend.patch" -p1
+ # modify source from PR2485, adds hipblas llama version
+ patch -N -i "${srcdir}/2485-hipblas.patch" -p1
+
if [[ $_ENABLE_PIPER = 1 ]]; then
# fix piper build
mkdir -p "sources/go-piper/piper-phonemize/pi/lib"
@@ -217,8 +222,21 @@ _build() {
mkdir -p backend-assets
cp -a backend/python backend-assets/python
fi
+ if test "$1" = "cublas"; then
+ _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-cuda"
+ elif test "$1" = "hipblas"; then
+ _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-hipblas"
+ else
+ _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-avx2"
+ fi
+ cat - << EOF
+
+BUILD: $1, GO_TAGS=$_GO_TAGS, OPTIONAL_MAKE_ARGS=$_OPTIONAL_MAKE_ARGS
+LLAMA_BACKEND: $_LLAMA_CPP_BACKEND
+OTHER_GRPC_BACKENDS: $_GRPC_BACKENDS
- make -j"$(nproc)" BUILD_TYPE="$1" GRPC_BACKENDS="$_GRPC_BACKENDS" \
+EOF
+ make -j"$(nproc)" BUILD_TYPE="$1" GRPC_BACKENDS="$_LLAMA_CPP_BACKEND $_GRPC_BACKENDS" \
GO_TAGS="$_GO_TAGS" $_OPTIONAL_MAKE_ARGS build
}