GenieHive/configs/node.singlebox.ollama.examp...

65 lines
1.6 KiB
YAML

node:
host_id: "singlebox-ollama"
display_name: "SingleBox Ollama"
listen_host: "127.0.0.1"
listen_port: 8891
address: "127.0.0.1"
labels:
topology: "singlebox"
runtime: "ollama"
control_plane:
base_url: "http://127.0.0.1:8800"
node_api_key: "change-me-node-key"
heartbeat_interval_s: 5
inventory:
model_roots: []
cpu_threads: 24
ram_gb: 64
capabilities:
cpu: true
cuda: false
managed_runtimes:
enabled: false
services:
- service_id: "singlebox/chat/qwen3"
kind: "chat"
endpoint: "http://127.0.0.1:11434"
runtime:
engine: "ollama"
launcher: "external"
# discover_protocol: "ollama" queries GET /api/tags and GET /api/ps on each heartbeat.
# Assets listed here serve as a static baseline; discovered models are merged in and
# their loaded state is corrected from /api/ps (VRAM-resident = loaded: true).
# loaded_model_count and vram_used_bytes are populated in observed from /api/ps.
discover_protocol: "ollama"
assets:
- asset_id: "qwen3"
loaded: true
state:
health: "healthy"
load_state: "loaded"
accept_requests: true
observed:
p50_latency_ms: 900
- service_id: "singlebox/embeddings/nomic-embed-text"
kind: "embeddings"
endpoint: "http://127.0.0.1:11434"
runtime:
engine: "ollama"
launcher: "external"
discover_protocol: "ollama"
assets:
- asset_id: "nomic-embed-text"
loaded: true
state:
health: "healthy"
load_state: "loaded"
accept_requests: true
observed:
p50_latency_ms: 150