node: host_id: "atlas-01" display_name: "Atlas GPU Box" listen_host: "127.0.0.1" listen_port: 8891 control_plane: base_url: "http://127.0.0.1:8800" node_api_key: "change-me-node-key" heartbeat_interval_s: 5 inventory: model_roots: - "/path/to/models" cpu_threads: 24 ram_gb: 128 capabilities: cuda: true rocm: false metal: false managed_runtimes: enabled: true llama_server_bin: "/path/to/llama-server" services: - service_id: "atlas-01/chat/qwen3-8b" kind: "chat" endpoint: "http://127.0.0.1:18091" runtime: engine: "llama.cpp" launcher: "managed" assets: - asset_id: "qwen3-8b-q4km" loaded: true state: health: "healthy" load_state: "loaded" accept_requests: true observed: p50_latency_ms: 900 tokens_per_sec: 40 - service_id: "atlas-01/embeddings/bge-small" kind: "embeddings" endpoint: "http://127.0.0.1:18092" runtime: engine: "llama.cpp" launcher: "managed" assets: - asset_id: "bge-small-en" loaded: true state: health: "healthy" load_state: "loaded" accept_requests: true