diff options
author | Felix Erkinger | 2024-06-04 15:14:26 +0200 |
---|---|---|
committer | Felix Erkinger | 2024-06-04 15:14:26 +0200 |
commit | 21c8c0c47f8601d34fab87ce98da60146d225829 (patch) | |
tree | 2a1db90baf6514acdcc0e68eadf7752fd02d6de5 | |
parent | cf43ecad36ee21e6b0c03d32dda80959a372a54f (diff) | |
download | aur-21c8c0c47f8601d34fab87ce98da60146d225829.tar.gz |
WIP: hipblas, upgpkg: localai-git 2.16.0.76.g34ab442c-2
-rw-r--r-- | .SRCINFO | 6 | ||||
-rw-r--r-- | 2485-hipblas.patch | 128 | ||||
-rw-r--r-- | PKGBUILD | 26 |
3 files changed, 154 insertions, 6 deletions
@@ -1,6 +1,6 @@ pkgbase = localai-git pkgdesc = Self-hosted OpenAI API alternative - Open Source, community-driven and local-first. - pkgver = 2.16.0.74.g6ef78ef7 + pkgver = 2.16.0.76.g34ab442c pkgrel = 2 url = https://github.com/mudler/LocalAI arch = x86_64 @@ -34,12 +34,13 @@ pkgbase = localai-git depends = python-torchaudio depends = python-torchvision provides = localai - provides = local-ai=2.16.0.74.g6ef78ef7 + provides = local-ai=2.16.0.76.g34ab442c conflicts = localai conflicts = local-ai backup = etc/localai/localai.conf source = localai::git+https://github.com/mudler/LocalAI source = libbackend.patch + source = 2485-hipblas.patch source = README.md source = localai.conf source = localai.service @@ -52,6 +53,7 @@ pkgbase = localai-git sha256sums = SKIP sha256sums = SKIP sha256sums = SKIP + sha256sums = SKIP pkgname = localai-git depends = protobuf diff --git a/2485-hipblas.patch b/2485-hipblas.patch new file mode 100644 index 000000000000..9695121d9d98 --- /dev/null +++ b/2485-hipblas.patch @@ -0,0 +1,128 @@ +From 317875873ef6b14316c87d011185577172be65bd Mon Sep 17 00:00:00 2001 +From: Ettore Di Giacinto <mudler@localai.io> +Date: Tue, 4 Jun 2024 13:02:22 +0200 +Subject: [PATCH] feat(amdgpu): try to build in single binary + +Signed-off-by: Ettore Di Giacinto <mudler@localai.io> +--- + .github/workflows/release.yaml | 10 ++++++++++ + Makefile | 8 ++++++++ + pkg/model/initializers.go | 20 ++++++++++++++++++-- + 3 files changed, 36 insertions(+), 2 deletions(-) + +diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml +index 618c81a39af..f9e734c0f5a 100644 +--- a/.github/workflows/release.yaml ++++ b/.github/workflows/release.yaml +@@ -38,6 +38,15 @@ jobs: + sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} + env: + CUDA_VERSION: 12-3 ++ - name: "Install Hipblas" ++ run: | ++ sudo apt-get update && \ ++ sudo apt-get install -y --no-install-recommends \ ++ hipblas-dev \ ++ rocblas-dev && \ ++ sudo apt-get clean && \ ++ sudo rm -rf /var/lib/apt/lists/* && \ ++ sudo ldconfig + - name: Cache grpc + id: cache-grpc + uses: actions/cache@v4 +@@ -61,6 +70,7 @@ jobs: + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 + export PATH=$PATH:$GOPATH/bin + export PATH=/usr/local/cuda/bin:$PATH ++ export PATH=/opt/rocm/bin:$PATH + GO_TAGS=p2p make dist + - uses: actions/upload-artifact@v4 + with: +diff --git a/Makefile b/Makefile +index f2c03086662..c0abfc2ae80 100644 +--- a/Makefile ++++ b/Makefile +@@ -327,6 +327,7 @@ ifeq ($(OS),Darwin) + $(info ${GREEN}I Skip CUDA build on MacOS${RESET}) + else + $(MAKE) backend-assets/grpc/llama-cpp-cuda ++ $(MAKE) backend-assets/grpc/llama-cpp-hipblas + endif + $(MAKE) build + mkdir -p release +@@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc + CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda + ++backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc ++ cp -rf backend/cpp/llama backend/cpp/llama-hipblas ++ $(MAKE) -C backend/cpp/llama-hipblas purge ++ $(info ${GREEN}I llama-cpp build info:hipblas${RESET}) ++ BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server ++ cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas ++ + backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-grpc + $(MAKE) -C backend/cpp/llama-grpc purge +diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go +index d013740ce5d..e9001f0a968 100644 +--- a/pkg/model/initializers.go ++++ b/pkg/model/initializers.go +@@ -37,6 +37,7 @@ const ( + LLamaCPPAVX = "llama-cpp-avx" + LLamaCPPFallback = "llama-cpp-fallback" + LLamaCPPCUDA = "llama-cpp-cuda" ++ LLamaCPPHipblas = "llama-cpp-hipblas" + LLamaCPPGRPC = "llama-cpp-grpc" + + Gpt4AllLlamaBackend = "gpt4all-llama" +@@ -93,7 +94,7 @@ ENTRY: + if autoDetect { + // if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up + // when starting the service +- foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false ++ foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false + if _, ok := backends[LLamaCPP]; !ok { + for _, e := range entry { + if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 { +@@ -116,6 +117,10 @@ ENTRY: + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA) + foundLCPPCuda = true + } ++ if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas { ++ backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas) ++ foundLCPPHipblas = true ++ } + } + } + } +@@ -169,6 +174,7 @@ ENTRY: + // selectGRPCProcess selects the GRPC process to start based on system capabilities + func selectGRPCProcess(backend, assetDir string) string { + foundCUDA := false ++ foundAMDGPU := false + var grpcProcess string + + // Select backend now just for llama.cpp +@@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string { + log.Info().Msgf("GPU device found but no CUDA backend present") + } + } ++ if strings.Contains(gpu.String(), "amd") { ++ p := backendPath(assetDir, LLamaCPPHipblas) ++ if _, err := os.Stat(p); err == nil { ++ log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend) ++ grpcProcess = p ++ foundAMDGPU = true ++ } else { ++ log.Info().Msgf("GPU device found but no HIPBLAS backend present") ++ } ++ } + } + } + +- if foundCUDA { ++ if foundCUDA || foundAMDGPU { + return grpcProcess + } + @@ -7,6 +7,7 @@ _ENABLE_ROCM=${_ENABLE_ROCM:-1} # additional backends if set to 1 _ENABLE_PYTHON=${_ENABLE_PYTHON:-1} +# piper build is currently broken, disable it _ENABLE_PIPER=${_ENABLE_PIPER:-0} # if GPU_TARGETS and AMDGPU_TARGETS are not set, mirror architecture list from arch:python-pytorch@2.3.0-2 @@ -35,8 +36,7 @@ else fi # enabled backends -_GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2 \ -backend-assets/grpc/whisper \ +_GRPC_BACKENDS="backend-assets/grpc/whisper \ backend-assets/grpc/local-store \ $_OPTIONAL_GRPC" # disabled backends: backend-assets/util/llama-cpp-rpc-server llama-cpp-grpc llama-ggml gpt4all rwkv tinydream bert-embeddings huggingface stablediffusion @@ -44,7 +44,7 @@ $_OPTIONAL_GRPC" _pkgbase="localai" pkgbase="${_pkgbase}-git" pkgname=() -pkgver=2.16.0.74.g6ef78ef7 +pkgver=2.16.0.76.g34ab442c pkgrel=2 pkgdesc="Self-hosted OpenAI API alternative - Open Source, community-driven and local-first." url="https://github.com/mudler/LocalAI" @@ -57,6 +57,7 @@ backup=("etc/${_pkgbase}/${_pkgbase}.conf") source=( "${_pkgbase}"::"git+https://github.com/mudler/LocalAI" "libbackend.patch" + "2485-hipblas.patch" "README.md" "${_pkgbase}.conf" "${_pkgbase}.service" @@ -72,6 +73,7 @@ sha256sums=( 'SKIP' 'SKIP' 'SKIP' + 'SKIP' ) depends=( @@ -181,6 +183,9 @@ EOF # modify python backend build library to use --system-site-packages, and dont reinstall torch* patch -N -i "${srcdir}/libbackend.patch" -p1 + # modify source from PR2485, adds hipblas llama version + patch -N -i "${srcdir}/2485-hipblas.patch" -p1 + if [[ $_ENABLE_PIPER = 1 ]]; then # fix piper build mkdir -p "sources/go-piper/piper-phonemize/pi/lib" @@ -217,8 +222,21 @@ _build() { mkdir -p backend-assets cp -a backend/python backend-assets/python fi + if test "$1" = "cublas"; then + _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-cuda" + elif test "$1" = "hipblas"; then + _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-hipblas" + else + _LLAMA_CPP_BACKEND="backend-assets/grpc/llama-cpp-avx2" + fi + cat - << EOF + +BUILD: $1, GO_TAGS=$_GO_TAGS, OPTIONAL_MAKE_ARGS=$_OPTIONAL_MAKE_ARGS +LLAMA_BACKEND: $_LLAMA_CPP_BACKEND +OTHER_GRPC_BACKENDS: $_GRPC_BACKENDS - make -j"$(nproc)" BUILD_TYPE="$1" GRPC_BACKENDS="$_GRPC_BACKENDS" \ +EOF + make -j"$(nproc)" BUILD_TYPE="$1" GRPC_BACKENDS="$_LLAMA_CPP_BACKEND $_GRPC_BACKENDS" \ GO_TAGS="$_GO_TAGS" $_OPTIONAL_MAKE_ARGS build } |