GenieHive/configs/node.example.yaml

57 lines
1.1 KiB
YAML

node:
host_id: "atlas-01"
display_name: "Atlas GPU Box"
listen_host: "127.0.0.1"
listen_port: 8891
control_plane:
base_url: "http://127.0.0.1:8800"
node_api_key: "change-me-node-key"
heartbeat_interval_s: 5
inventory:
model_roots:
- "/path/to/models"
cpu_threads: 24
ram_gb: 128
capabilities:
cuda: true
rocm: false
metal: false
managed_runtimes:
enabled: true
llama_server_bin: "/path/to/llama-server"
services:
- service_id: "atlas-01/chat/qwen3-8b"
kind: "chat"
endpoint: "http://127.0.0.1:18091"
runtime:
engine: "llama.cpp"
launcher: "managed"
assets:
- asset_id: "qwen3-8b-q4km"
loaded: true
state:
health: "healthy"
load_state: "loaded"
accept_requests: true
observed:
p50_latency_ms: 900
tokens_per_sec: 40
- service_id: "atlas-01/embeddings/bge-small"
kind: "embeddings"
endpoint: "http://127.0.0.1:18092"
runtime:
engine: "llama.cpp"
launcher: "managed"
assets:
- asset_id: "bge-small-en"
loaded: true
state:
health: "healthy"
load_state: "loaded"
accept_requests: true