node:
  host_id: "atlas-01"
  display_name: "Atlas GPU Box"
  listen_host: "127.0.0.1"
  listen_port: 8891

control_plane:
  base_url: "http://127.0.0.1:8800"
  node_api_key: "change-me-node-key"
  heartbeat_interval_s: 5

inventory:
  model_roots:
    - "/path/to/models"
  cpu_threads: 24
  ram_gb: 128
  capabilities:
    cuda: true
    rocm: false
    metal: false

managed_runtimes:
  enabled: true
  llama_server_bin: "/path/to/llama-server"

services:
  - service_id: "atlas-01/chat/qwen3-8b"
    kind: "chat"
    endpoint: "http://127.0.0.1:18091"
    runtime:
      engine: "llama.cpp"
      launcher: "managed"
    assets:
      - asset_id: "qwen3-8b-q4km"
        loaded: true
    state:
      health: "healthy"
      load_state: "loaded"
      accept_requests: true
    observed:
      p50_latency_ms: 900
      tokens_per_sec: 40

  - service_id: "atlas-01/embeddings/bge-small"
    kind: "embeddings"
    endpoint: "http://127.0.0.1:18092"
    runtime:
      engine: "llama.cpp"
      launcher: "managed"
    assets:
      - asset_id: "bge-small-en"
        loaded: true
    state:
      health: "healthy"
      load_state: "loaded"
      accept_requests: true