localai.conf


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

# Configuration file for the application

## Specify a different bind address (defaults to ":8080")
LOCALAI_ADDRESS=:55080

## Default models context size
LOCALAI_CONTEXT_SIZE=4096

## Define galleries.
## models will to install will be visible in `/models/available`
LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]

# Enable GPU acceleration
F16=true

## Set number of threads. defaults to physical cores (since bdd6769)
## Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.
# LOCALAI_THREADS=8

## CORS settings
# LOCALAI_CORS=true
# LOCALAI_CORS_ALLOW_ORIGINS=*

## Enable debug mode
# LOCALAI_LOG_LEVEL=debug

## Disables COMPEL (Diffusers)
# COMPEL=0

## Enable/Disable single backend (useful if only one GPU is available)
# LOCALAI_SINGLE_ACTIVE_BACKEND=true

## Specify a default upload limit in MB (whisper)
LOCALAI_UPLOAD_LIMIT=30

## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py

### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
##
### Preload libraries
# LD_PRELOAD=

### Huggingface cache for models
# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface

### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1

### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1

### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""

### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true

### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
LOCALAI_WATCHDOG_IDLE=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
LOCALAI_WATCHDOG_IDLE_TIMEOUT=45m
#
# Enables watchdog to kill backends that are busy for too much time
LOCALAI_WATCHDOG_BUSY=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
LOCALAI_WATCHDOG_BUSY_TIMEOUT=15m