36 lines
1.4 KiB
Bash
36 lines
1.4 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Example launcher pattern for:
|
|
# - GPU0 chat model on :18091
|
|
# - GPU1 chat model on :18092
|
|
# - CPU fallback chat model on :18093
|
|
#
|
|
# Defaults are based on models already present under /home/netuser/bin/models/llm.
|
|
# Override them via env vars if you want different weights.
|
|
|
|
MODEL_GPU0="${MODEL_GPU0:-/home/netuser/bin/models/llm/Qwen2.5-14B-Instruct-1M-Q5_K_M.gguf}"
|
|
MODEL_GPU1="${MODEL_GPU1:-/home/netuser/bin/models/llm/Qwen3.5-9B-Q5_K_M.gguf}"
|
|
MODEL_CPU="${MODEL_CPU:-/home/netuser/bin/models/llm/rocket-3b.Q5_K_M.gguf}"
|
|
LLAMA_SERVER_BIN="${LLAMA_SERVER_BIN:-/home/netuser/bin/llama.cpp/build/bin/llama-server}"
|
|
|
|
echo "Start these in separate shells or tmux panes."
|
|
echo "Helper scripts are available too:"
|
|
echo
|
|
echo "bash /home/netuser/bin/geniehive/scripts/p40_triple_gpu0.sh"
|
|
echo
|
|
echo "bash /home/netuser/bin/geniehive/scripts/p40_triple_gpu1.sh"
|
|
echo
|
|
echo "bash /home/netuser/bin/geniehive/scripts/p40_triple_cpu.sh"
|
|
echo
|
|
echo "Or try the combined launcher:"
|
|
echo "bash /home/netuser/bin/geniehive/scripts/launch_p40_triple.sh"
|
|
echo
|
|
echo "Equivalent raw commands:"
|
|
echo
|
|
echo "CUDA_VISIBLE_DEVICES=0 \"$LLAMA_SERVER_BIN\" -m \"$MODEL_GPU0\" --host 127.0.0.1 --port 18091"
|
|
echo
|
|
echo "CUDA_VISIBLE_DEVICES=1 \"$LLAMA_SERVER_BIN\" -m \"$MODEL_GPU1\" --host 127.0.0.1 --port 18092"
|
|
echo
|
|
echo "\"$LLAMA_SERVER_BIN\" -m \"$MODEL_CPU\" --host 127.0.0.1 --port 18093 -ngl 0 -t 12"
|