369 lines
14 KiB
Python
369 lines
14 KiB
Python
from pathlib import Path
|
|
|
|
from geniehive_control.main import create_app
|
|
from geniehive_control.models import BenchmarkSample, HostHeartbeat, HostRegistration, RegisteredService, RoleProfile, RouteMatchRequest
|
|
from geniehive_control.registry import Registry
|
|
|
|
|
|
def test_registry_persists_registration_and_heartbeat(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
|
|
host = registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
display_name="Atlas GPU Box",
|
|
address="192.168.1.101",
|
|
labels={"site": "home-lab"},
|
|
capabilities={"cuda": True},
|
|
resources={"cpu_threads": 24},
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/qwen3-8b",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
protocol="openai",
|
|
endpoint="http://192.168.1.101:18091",
|
|
runtime={"engine": "llama.cpp", "launcher": "managed"},
|
|
assets=[{"asset_id": "qwen3-8b-q4km", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 900, "tokens_per_sec": 40},
|
|
)
|
|
],
|
|
)
|
|
)
|
|
assert host is not None
|
|
assert host["host_id"] == "atlas-01"
|
|
|
|
updated = registry.heartbeat_host(
|
|
HostHeartbeat(
|
|
host_id="atlas-01",
|
|
status={"state": "online"},
|
|
metrics={"gpu_utilization_pct": 77},
|
|
)
|
|
)
|
|
assert updated is not None
|
|
assert updated["metrics"]["gpu_utilization_pct"] == 77
|
|
|
|
hosts = registry.list_hosts()
|
|
services = registry.list_services()
|
|
health = registry.cluster_health(stale_after_s=30)
|
|
|
|
assert len(hosts) == 1
|
|
assert len(services) == 1
|
|
assert services[0]["service_id"] == "atlas-01/chat/qwen3-8b"
|
|
assert services[0]["state"]["health"] == "healthy"
|
|
assert health["host_count"] == 1
|
|
assert health["healthy_service_count"] == 1
|
|
|
|
|
|
def test_registry_persists_roles_and_resolves_direct_and_role_routes(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
|
|
registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
address="192.168.1.101",
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/qwen3-8b",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18091",
|
|
assets=[{"asset_id": "qwen3-8b-q4km", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 900},
|
|
),
|
|
RegisteredService(
|
|
service_id="atlas-01/embeddings/bge-small",
|
|
host_id="atlas-01",
|
|
kind="embeddings",
|
|
endpoint="http://192.168.1.101:18092",
|
|
assets=[{"asset_id": "bge-small-en", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 120},
|
|
),
|
|
],
|
|
)
|
|
)
|
|
registry.upsert_roles(
|
|
[
|
|
RoleProfile(
|
|
role_id="mentor",
|
|
display_name="Mentor",
|
|
operation="chat",
|
|
modality="text",
|
|
routing_policy={"preferred_families": ["qwen3"]},
|
|
),
|
|
RoleProfile(
|
|
role_id="embedder",
|
|
display_name="Embedder",
|
|
operation="embeddings",
|
|
modality="text",
|
|
routing_policy={"require_loaded": True},
|
|
),
|
|
]
|
|
)
|
|
|
|
roles = registry.list_roles()
|
|
assert len(roles) == 2
|
|
assert roles[0]["role_id"] == "embedder"
|
|
|
|
direct = registry.resolve_route("qwen3-8b-q4km")
|
|
assert direct is not None
|
|
assert direct["match_type"] == "direct"
|
|
assert direct["service"]["service_id"] == "atlas-01/chat/qwen3-8b"
|
|
|
|
by_role = registry.resolve_route("mentor")
|
|
assert by_role is not None
|
|
assert by_role["match_type"] == "role"
|
|
assert by_role["role"]["role_id"] == "mentor"
|
|
assert by_role["service"]["service_id"] == "atlas-01/chat/qwen3-8b"
|
|
|
|
embed_role = registry.resolve_route("embedder")
|
|
assert embed_role is not None
|
|
assert embed_role["service"]["service_id"] == "atlas-01/embeddings/bge-small"
|
|
|
|
models = registry.list_client_models()
|
|
ids = {item["id"] for item in models}
|
|
assert "atlas-01/chat/qwen3-8b" in ids
|
|
assert "qwen3-8b-q4km" in ids
|
|
assert "mentor" in ids
|
|
mentor = next(item for item in models if item["id"] == "mentor")
|
|
assert mentor["geniehive"]["route_type"] == "role"
|
|
assert mentor["geniehive"]["offload_hint"]["suitability"] == "good_for_low_complexity"
|
|
assert mentor["geniehive"]["effective_request_policy"]["body_defaults"]["chat_template_kwargs"]["enable_thinking"] is False
|
|
asset = next(item for item in models if item["id"] == "qwen3-8b-q4km")
|
|
assert asset["geniehive"]["route_type"] == "asset"
|
|
assert asset["geniehive"]["offload_hint"]["recommended_for"] == "lower-complexity offload"
|
|
assert asset["geniehive"]["effective_request_policy"]["body_defaults"]["chat_template_kwargs"]["enable_thinking"] is False
|
|
|
|
|
|
def test_control_app_exposes_expected_routes() -> None:
|
|
app = create_app()
|
|
paths = {route.path for route in app.routes}
|
|
assert "/health" in paths
|
|
assert "/v1/models" in paths
|
|
assert "/v1/nodes/register" in paths
|
|
assert "/v1/nodes/heartbeat" in paths
|
|
assert "/v1/cluster/hosts" in paths
|
|
assert "/v1/cluster/services" in paths
|
|
assert "/v1/cluster/benchmarks" in paths
|
|
assert "/v1/cluster/roles" in paths
|
|
assert "/v1/cluster/health" in paths
|
|
assert "/v1/cluster/routes/resolve" in paths
|
|
assert "/v1/cluster/routes/match" in paths
|
|
|
|
|
|
def test_registry_can_rank_routes_for_task_statements(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
|
|
registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
address="192.168.1.101",
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/qwen-reasoner",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18091",
|
|
assets=[{"asset_id": "qwen3.5-9b", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 1100, "tokens_per_sec": 28},
|
|
),
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/rocket-background",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18093",
|
|
assets=[{"asset_id": "rocket-3b", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 4200, "tokens_per_sec": 7},
|
|
),
|
|
],
|
|
)
|
|
)
|
|
registry.upsert_roles(
|
|
[
|
|
RoleProfile(
|
|
role_id="general_assistant",
|
|
display_name="General Assistant",
|
|
description="Fast general technical assistant for reasoning and question answering.",
|
|
operation="chat",
|
|
modality="text",
|
|
routing_policy={"preferred_families": ["qwen3.5", "qwen"]},
|
|
),
|
|
RoleProfile(
|
|
role_id="background_summarizer",
|
|
display_name="Background Summarizer",
|
|
description="Slow fallback summarizer for lower-priority background work.",
|
|
operation="chat",
|
|
modality="text",
|
|
routing_policy={"preferred_families": ["rocket"]},
|
|
),
|
|
]
|
|
)
|
|
|
|
result = registry.match_routes(
|
|
RouteMatchRequest(
|
|
task="fast technical reasoning for an interactive assistant",
|
|
kind="chat",
|
|
modality="text",
|
|
limit=4,
|
|
)
|
|
)
|
|
|
|
assert result["task_count"] == 1
|
|
assert result["candidates"]
|
|
top = result["candidates"][0]
|
|
assert top["candidate_type"] == "role"
|
|
assert top["candidate_id"] == "general_assistant"
|
|
assert top["service"]["service_id"] == "atlas-01/chat/qwen-reasoner"
|
|
assert top["signals"]["preferred_family_match"] == 1.0
|
|
|
|
|
|
def test_registry_match_can_include_direct_services(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
address="192.168.1.101",
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/qwen3-8b",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18091",
|
|
assets=[{"asset_id": "qwen3-8b-q4km", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 900, "tokens_per_sec": 40},
|
|
)
|
|
],
|
|
)
|
|
)
|
|
|
|
result = registry.match_routes(
|
|
RouteMatchRequest(
|
|
task="qwen model for quick chat",
|
|
kind="chat",
|
|
include_direct_services=True,
|
|
limit=4,
|
|
)
|
|
)
|
|
|
|
direct = next(candidate for candidate in result["candidates"] if candidate["candidate_type"] == "service")
|
|
assert direct["candidate_id"] == "atlas-01/chat/qwen3-8b"
|
|
assert direct["service"]["assets"][0]["asset_id"] == "qwen3-8b-q4km"
|
|
|
|
|
|
def test_registry_persists_benchmark_samples_and_uses_them_for_matching(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
address="192.168.1.101",
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/qwen-fast",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18091",
|
|
assets=[{"asset_id": "qwen3.5-9b", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 1500, "tokens_per_sec": 22},
|
|
),
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/rocket-slow",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18093",
|
|
assets=[{"asset_id": "rocket-3b", "loaded": True}],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 1200, "tokens_per_sec": 10},
|
|
),
|
|
],
|
|
)
|
|
)
|
|
registry.upsert_benchmark_samples(
|
|
[
|
|
BenchmarkSample(
|
|
benchmark_id="bench-qwen-1",
|
|
service_id="atlas-01/chat/qwen-fast",
|
|
asset_id="qwen3.5-9b",
|
|
workload="chat.short_reasoning",
|
|
observed_at=1000.0,
|
|
results={"tokens_per_sec": 30, "ttft_ms": 900, "quality_score": 0.9},
|
|
),
|
|
BenchmarkSample(
|
|
benchmark_id="bench-rocket-1",
|
|
service_id="atlas-01/chat/rocket-slow",
|
|
asset_id="rocket-3b",
|
|
workload="chat.short_reasoning",
|
|
observed_at=1000.0,
|
|
results={"tokens_per_sec": 9, "ttft_ms": 1900, "quality_score": 0.4},
|
|
),
|
|
]
|
|
)
|
|
|
|
samples = registry.list_benchmark_samples(service_id="atlas-01/chat/qwen-fast")
|
|
assert len(samples) == 1
|
|
assert samples[0]["benchmark_id"] == "bench-qwen-1"
|
|
|
|
result = registry.match_routes(
|
|
RouteMatchRequest(
|
|
task="fast short reasoning for chat responses",
|
|
workloads=["chat.short_reasoning"],
|
|
kind="chat",
|
|
include_direct_services=True,
|
|
limit=4,
|
|
)
|
|
)
|
|
|
|
top_service = next(candidate for candidate in result["candidates"] if candidate["candidate_type"] == "service")
|
|
assert top_service["candidate_id"] == "atlas-01/chat/qwen-fast"
|
|
assert top_service["signals"]["benchmark_match_count"] == 1
|
|
assert top_service["signals"]["best_workload_overlap"] == 1.0
|
|
|
|
|
|
def test_registry_exposes_asset_request_policy_in_model_metadata(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "geniehive.sqlite3"
|
|
registry = Registry(db_path)
|
|
registry.register_host(
|
|
HostRegistration(
|
|
host_id="atlas-01",
|
|
address="192.168.1.101",
|
|
services=[
|
|
RegisteredService(
|
|
service_id="atlas-01/chat/custom-model",
|
|
host_id="atlas-01",
|
|
kind="chat",
|
|
endpoint="http://192.168.1.101:18091",
|
|
assets=[
|
|
{
|
|
"asset_id": "custom-model-v1",
|
|
"loaded": True,
|
|
"request_policy": {
|
|
"body_defaults": {
|
|
"temperature": 0.2,
|
|
"chat_template_kwargs": {"custom_flag": "yes"},
|
|
}
|
|
},
|
|
}
|
|
],
|
|
state={"health": "healthy", "load_state": "loaded", "accept_requests": True},
|
|
observed={"p50_latency_ms": 900},
|
|
)
|
|
],
|
|
)
|
|
)
|
|
|
|
models = registry.list_client_models()
|
|
asset = next(item for item in models if item["id"] == "custom-model-v1")
|
|
assert asset["geniehive"]["effective_request_policy"]["body_defaults"]["temperature"] == 0.2
|
|
assert asset["geniehive"]["effective_request_policy"]["body_defaults"]["chat_template_kwargs"]["custom_flag"] == "yes"
|