node_id: "node-1" listen_host: "127.0.0.1" # Set to a LAN/private IP (or 0.0.0.0) if dispatcher is on another machine listen_port: 8091 # Set to the dispatcher gateway URL if you want auto-registration/heartbeat. dispatcher_base_url: "http://127.0.0.1:8080" # Optional auth key presented to dispatcher for /v1/nodes/* endpoints dispatcher_node_key: "change-me-node-key-1" dispatcher_roles: ["planner", "coder"] heartbeat_interval_sec: 5 llama_server_bin: "llama-server" llama_server_startup_timeout_s: 30 llama_server_probe_interval_s: 0.5 model_roots: - "/models" models: - model_id: "planner-gguf" # path is the exact GGUF file that this model_id will load when requested path: "/models/SomePlannerModel.Q5_K_M.gguf" roles: ["planner"] default_ctx: 8192 # Common llama-server options can be configured as structured fields: ctx_size: 8192 gpu_layers: 60 threads: 8 batch_size: 1024 flash_attn: true # Keep server_args for less common passthrough flags. server_args: parallel: 1