2.4 KiB
2.4 KiB
GenieHive Schemas
These are canonical logical schemas for v1. They are documentation first, not final implementation code.
Host
host:
host_id: "atlas-01"
display_name: "Atlas GPU Box"
address: "192.168.1.101"
labels:
site: "home-lab"
class: "gpu"
capabilities:
cuda: true
rocm: false
metal: false
resources:
cpu_threads: 24
ram_gb: 128
gpus:
- gpu_id: "cuda:0"
name: "RTX 4090"
vram_gb: 24
auth:
node_key_id: "nk_atlas_01"
status:
state: "online"
last_seen: "2026-04-05T15:30:00Z"
Service
service:
service_id: "atlas-01/chat/qwen3-8b"
host_id: "atlas-01"
kind: "chat"
protocol: "openai"
endpoint: "http://192.168.1.101:18091"
runtime:
engine: "llama.cpp"
launcher: "managed"
assets:
- asset_id: "qwen3-8b-q4km"
loaded: true
state:
health: "healthy"
load_state: "loaded"
accept_requests: true
observed:
p50_latency_ms: 920
p95_latency_ms: 1900
tokens_per_sec: 42
Asset
asset:
asset_id: "qwen3-8b-q4km"
family: "Qwen3-8B"
modality: "text"
operation: "chat"
format: "gguf"
locator:
kind: "path"
value: "/models/qwen3-8b/qwen3-8b-q4_k_m.gguf"
metadata:
quant: "Q4_K_M"
ctx_train: 32768
Role Profile
role:
role_id: "mentor"
display_name: "Mentor"
description: "Guidance-oriented instructional reasoning"
modality: "text"
operation: "chat"
prompt_policy:
system_prompt: "You guide without doing the user's work for them."
user_template: "{{ user_input }}"
routing_policy:
preferred_families: ["Qwen3", "Mistral"]
preferred_labels: ["instruction", "stable"]
min_context: 8192
require_loaded: false
fallback_roles: ["general_assistant"]
Health Sample
health_sample:
sample_id: "hs_01"
target_type: "service"
target_id: "atlas-01/chat/qwen3-8b"
observed_at: "2026-04-05T15:30:00Z"
status: "healthy"
checks:
http_ok: true
models_ok: true
auth_ok: true
metrics:
queue_depth: 1
in_flight: 1
mem_used_gb: 18.4
Benchmark Sample
benchmark_sample:
benchmark_id: "bench_01"
service_id: "atlas-01/chat/qwen3-8b"
asset_id: "qwen3-8b-q4km"
observed_at: "2026-04-05T15:25:00Z"
workload: "chat.short_reasoning"
results:
prompt_tokens: 512
completion_tokens: 256
ttft_ms: 780
tokens_per_sec: 44