35 lines
984 B
Bash
Executable File
35 lines
984 B
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
IMAGE="${IMAGE:-ghcr.io/ggml-org/llama.cpp:server-cuda}"
|
|
MODEL_PATH="${MODEL_PATH:-/home/netuser/bin/models/llm/Qwen3.5-9B-Q5_K_M.gguf}"
|
|
GPU_INDEX="${GPU_INDEX:-0}"
|
|
CTX_SIZE="${CTX_SIZE:-512}"
|
|
PORT="${PORT:-19091}"
|
|
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-90}"
|
|
|
|
if [[ ! -f "${MODEL_PATH}" ]]; then
|
|
echo "Model not found: ${MODEL_PATH}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Image: ${IMAGE}"
|
|
echo "Model: ${MODEL_PATH}"
|
|
echo "GPU: ${GPU_INDEX}"
|
|
echo "Port: ${PORT}"
|
|
echo "Timeout: ${TIMEOUT_SECONDS}s"
|
|
echo
|
|
echo "This probe is successful if llama-server loads the model and begins serving."
|
|
echo "A timeout exit after successful startup is acceptable for this test."
|
|
echo
|
|
|
|
timeout "${TIMEOUT_SECONDS}"s docker run --rm --gpus all \
|
|
-e CUDA_VISIBLE_DEVICES="${GPU_INDEX}" \
|
|
-v "$(dirname "${MODEL_PATH}"):/models:ro" \
|
|
"${IMAGE}" \
|
|
-m "/models/$(basename "${MODEL_PATH}")" \
|
|
-ngl 999 \
|
|
--ctx-size "${CTX_SIZE}" \
|
|
--host 127.0.0.1 \
|
|
--port "${PORT}"
|