RoleMesh-Gateway/tests/test_node_agent.py

155 lines
4.9 KiB
Python

from __future__ import annotations
import asyncio
from pathlib import Path
import httpx
from rolemesh_node_agent.adapters.base import DeviceMetrics, DeviceRef
from rolemesh_node_agent.config import ModelEntry, NodeAgentConfig
def _node_config(tmp_path: Path) -> NodeAgentConfig:
model_path = tmp_path / "model.gguf"
model_path.write_bytes(b"GGUF")
return NodeAgentConfig(
node_id="node-1",
model_roots=[tmp_path],
models=[ModelEntry(model_id="planner-gguf", path=model_path, roles=["planner"])],
)
async def _request(app, method: str, path: str, **kwargs) -> httpx.Response:
transport = httpx.ASGITransport(app=app)
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
return await client.request(method, path, **kwargs)
def test_inventory_reports_models_metrics_and_discovered_gguf(tmp_path):
from rolemesh_node_agent.main import create_app
cfg = _node_config(tmp_path)
app = create_app(cfg)
async def fake_discover_devices():
return [DeviceRef(kind="gpu", backend="cuda", id="gpu:0")]
async def fake_get_metrics():
return [
DeviceMetrics(
device=DeviceRef(kind="gpu", backend="cuda", id="gpu:0"),
loaded_model_id="planner-gguf",
queue_depth=1,
)
]
app.state.cuda.discover_devices = fake_discover_devices
app.state.cuda.get_metrics = fake_get_metrics
response = asyncio.run(_request(app, "GET", "/v1/node/inventory"))
body = response.json()
assert response.status_code == 200
assert body["models"][0]["model_id"] == "planner-gguf"
assert body["metrics"][0]["loaded_model_id"] == "planner-gguf"
assert body["discovered_gguf"][0]["name"] == "model.gguf"
asyncio.run(app.state.http.aclose())
def test_chat_completions_routes_to_local_server_and_streams(tmp_path):
from rolemesh_node_agent.main import create_app
cfg = _node_config(tmp_path)
app = create_app(cfg)
calls = {}
async def fake_discover_devices():
return [DeviceRef(kind="gpu", backend="cuda", id="gpu:0")]
async def fake_ensure_server(device, *, model_path, model_id, server_args):
calls["device"] = device.id
calls["model_path"] = model_path
calls["model_id"] = model_id
return "http://127.0.0.1:9100"
async def fake_chat(base_url, payload):
calls["base_url"] = base_url
calls["payload"] = payload
return {"id": "node-cmpl", "choices": [{"message": {"role": "assistant", "content": "ok"}}]}
async def fake_stream(base_url, payload):
calls["stream_base_url"] = base_url
calls["stream_payload"] = payload
yield b"data: first\n\n"
yield b"data: [DONE]\n\n"
app.state.cuda.discover_devices = fake_discover_devices
app.state.cuda.ensure_server = fake_ensure_server
app.state.upstream.chat_completions = fake_chat
app.state.upstream.stream_chat_completions = fake_stream
response = asyncio.run(
_request(
app,
"POST",
"/v1/chat/completions",
json={
"model": "planner-gguf",
"messages": [{"role": "user", "content": "hello"}],
},
)
)
stream_response = asyncio.run(
_request(
app,
"POST",
"/v1/chat/completions",
json={
"model": "planner-gguf",
"stream": True,
"messages": [{"role": "user", "content": "hello"}],
},
)
)
assert response.status_code == 200
assert response.json()["choices"][0]["message"]["content"] == "ok"
assert calls["device"] == "gpu:0"
assert calls["base_url"] == "http://127.0.0.1:9100"
assert "data: first" in stream_response.text
assert calls["stream_base_url"] == "http://127.0.0.1:9100"
asyncio.run(app.state.http.aclose())
def test_chat_completions_returns_503_when_server_startup_fails(tmp_path):
from rolemesh_node_agent.main import create_app
from rolemesh_node_agent.adapters.cuda import ServerStartupError
cfg = _node_config(tmp_path)
app = create_app(cfg)
async def fake_discover_devices():
return [DeviceRef(kind="gpu", backend="cuda", id="gpu:0")]
async def fake_ensure_server(device, *, model_path, model_id, server_args):
raise ServerStartupError("Timed out waiting for model load")
app.state.cuda.discover_devices = fake_discover_devices
app.state.cuda.ensure_server = fake_ensure_server
response = asyncio.run(
_request(
app,
"POST",
"/v1/chat/completions",
json={
"model": "planner-gguf",
"messages": [{"role": "user", "content": "hello"}],
},
)
)
assert response.status_code == 503
assert response.json()["error"]["code"] == "server_startup_error"
asyncio.run(app.state.http.aclose())