blob: bbafd4e7fb8b8aedc8e4b60d1157e31ea8c3e092 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
[Unit]
Description=SGLang LLM Server (%i)
Wants=network-online.target
After=network.target network-online.target
[Service]
Type=simple
User=sglang
Group=sglang
WorkingDirectory=/var/lib/sglang
Environment=HOME=/var/lib/sglang
Environment=HF_HOME=/var/lib/sglang
Environment=CUDA_HOME=/opt/cuda
# nvcc 13.x can't parse libstdc++ 16's new constructs (Arch's default gcc 16);
# flashinfer/sgl_kernel JIT extensions fail at build time without this.
# Drop this line once nvcc supports gcc 16 (CUDA 13.3+ likely).
Environment=NVCC_PREPEND_FLAGS=--compiler-bindir=/usr/bin/gcc-15
EnvironmentFile=-/etc/sglang/sglang.conf
EnvironmentFile=-/etc/sglang/sglang.env
EnvironmentFile=-/etc/sglang/%i.conf
EnvironmentFile=-/etc/sglang/%i.env
# Inject any missing HF-cache files for checkpoints that ship incomplete
# (e.g. text-only siblings of VL checkpoints that strip preprocessor_config.json).
# Idempotent; no-op if /usr/share/sglang/cache-fixups/ is empty. Non-fatal.
ExecStartPre=-/usr/lib/sglang/cache-fixup
ExecStart=/usr/bin/python -m sglang.launch_server --host 127.0.0.1 --sleep-on-idle $SGLANG_OPTS $SGLANG_ARGS
TimeoutStartSec=infinity
Restart=on-failure
RestartSec=3
StateDirectory=sglang
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=full
ProtectHome=yes
[Install]
WantedBy=multi-user.target
|