34 lines
1.0 KiB
YAML
34 lines
1.0 KiB
YAML
node_id: "node-1"
|
|
listen_host: "127.0.0.1" # Set to a LAN/private IP (or 0.0.0.0) if dispatcher is on another machine
|
|
listen_port: 8091
|
|
|
|
# Set to the dispatcher gateway URL if you want auto-registration/heartbeat.
|
|
dispatcher_base_url: "http://127.0.0.1:8080"
|
|
# Optional auth key presented to dispatcher for /v1/nodes/* endpoints
|
|
dispatcher_node_key: "change-me-node-key-1"
|
|
dispatcher_roles: ["planner", "coder"]
|
|
heartbeat_interval_sec: 5
|
|
|
|
llama_server_bin: "llama-server"
|
|
llama_server_startup_timeout_s: 30
|
|
llama_server_probe_interval_s: 0.5
|
|
|
|
model_roots:
|
|
- "/models"
|
|
|
|
models:
|
|
- model_id: "planner-gguf"
|
|
# path is the exact GGUF file that this model_id will load when requested
|
|
path: "/models/SomePlannerModel.Q5_K_M.gguf"
|
|
roles: ["planner"]
|
|
default_ctx: 8192
|
|
# Common llama-server options can be configured as structured fields:
|
|
ctx_size: 8192
|
|
gpu_layers: 60
|
|
threads: 8
|
|
batch_size: 1024
|
|
flash_attn: true
|
|
# Keep server_args for less common passthrough flags.
|
|
server_args:
|
|
parallel: 1
|