From 0226f7526d05cdb8d6512467b236c0079c1d99ea Mon Sep 17 00:00:00 2001 From: welsberr Date: Mon, 16 Mar 2026 22:35:09 -0400 Subject: [PATCH] Clarified model weight selection in different modes --- README.md | 56 +++++++++++++++++++++++++++++++++ configs/models.example.yaml | 2 ++ configs/node_agent.example.yaml | 1 + docs/CONFIG.md | 45 ++++++++++++++++++++++++++ docs/NODE_AGENT.md | 15 +++++++++ 5 files changed, 119 insertions(+) diff --git a/README.md b/README.md index a5fb22f..3a7f3f8 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,59 @@ Examples of project-specific roles: If your workflow changes, update the `models:` section in config rather than treating the example roles as required. +## Where model weights are defined + +There are two different patterns in this project, and the model-weight location is defined in different places depending on which one you use. + +### Proxy mode + +In gateway proxy mode, the gateway does **not** point directly to a GGUF or other weight file. +It only points to an upstream inference server: + +```yaml +models: + planner: + type: proxy + proxy_url: http://127.0.0.1:8011 +``` + +In that setup, the actual model weights are chosen by the upstream server itself. +Examples: + +- `llamafile --server -m /path/to/model.gguf ...` +- `llama-server -m /path/to/model.gguf ...` +- Ollama with `defaults.model: some-model-name` + +So in proxy mode: +- RoleMesh alias `planner` -> upstream server at `proxy_url` +- upstream server -> actual weight file or model name + +| Upstream type | Where weights/model are chosen | What RoleMesh config provides | +| --- | --- | --- | +| `llamafile --server` | CLI `-m /path/to/model.gguf` when the server starts | `proxy_url` | +| `llama-server` | CLI `-m /path/to/model.gguf` when the server starts | `proxy_url` | +| Ollama OpenAI-compatible API | request body `model`, often injected via `defaults.model` | `proxy_url` plus optional `defaults.model` | + +### Node-agent mode + +In node-agent mode, the weight file is defined explicitly in the node-agent config: + +```yaml +models: + - model_id: "planner-gguf" + path: "/models/SomePlannerModel.Q5_K_M.gguf" + roles: ["planner"] +``` + +In that setup: +- `model_id` is the model name exposed by the node agent +- `path` is the actual GGUF weight file to load +- `roles` are the role labels that node can serve if used with discovery + +So in node-agent mode: +- node-agent `model_id` -> exact weight file path via `path` +- gateway discovered alias -> node role -> node-agent model load + ## Quick Start This is the fastest path to a working local setup. @@ -89,6 +142,7 @@ models: Save that as `configs/models.yaml`. You are not limited to `planner` and `writer`. Those are just placeholders for whatever roles your project needs. +In this proxy example, the actual weight files are defined by the two backend processes started in step 2, not by the gateway config. ### 4. Run the gateway @@ -156,6 +210,8 @@ Example launch: ./llamafile --server -m /path/to/model.gguf --host 127.0.0.1 --port 8011 --nobrowser ``` +In this case, `/path/to/model.gguf` is where the actual weights are chosen, and RoleMesh only points to that running server. + ### llama.cpp / llama-server - Verified live through the RoleMesh Node Agent on NVIDIA GPUs diff --git a/configs/models.example.yaml b/configs/models.example.yaml index 7e67a89..00b0d02 100644 --- a/configs/models.example.yaml +++ b/configs/models.example.yaml @@ -19,6 +19,8 @@ auth: # - type: discovered (resolved from registered nodes by role) # The names under "models" are project-defined role aliases, not a fixed built-in list. # Rename or replace planner/writer/coder/reviewer with whatever your workflow needs. +# In proxy mode, the actual weight file is chosen by the upstream server behind proxy_url. +# In discovered mode, the actual weight file is chosen on the node side (for example via node-agent models[].path). models: planner: type: proxy diff --git a/configs/node_agent.example.yaml b/configs/node_agent.example.yaml index e16a6c5..96e2211 100644 --- a/configs/node_agent.example.yaml +++ b/configs/node_agent.example.yaml @@ -18,6 +18,7 @@ model_roots: models: - model_id: "planner-gguf" + # path is the exact GGUF file that this model_id will load when requested path: "/models/SomePlannerModel.Q5_K_M.gguf" roles: ["planner"] default_ctx: 8192 diff --git a/docs/CONFIG.md b/docs/CONFIG.md index 8a11aa5..4ad649f 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -51,6 +51,51 @@ models: proxy_url: http://127.0.0.1:8013 ``` +## Where the actual model weights are selected + +This depends on the backend pattern. + +### For `type: proxy` + +The gateway alias does **not** point directly to a weight file. It points to an already-running inference server: + +```yaml +models: + writer: + type: proxy + proxy_url: http://127.0.0.1:8012 +``` + +The actual model weights are chosen by that upstream server, not by RoleMesh Gateway. + +Examples: +- `llamafile --server -m /path/to/model.gguf ...` +- `llama-server -m /path/to/model.gguf ...` +- Ollama with `defaults.model: dolphin3:latest` + +| Upstream type | Where weights/model are chosen | RoleMesh fields involved | +| --- | --- | --- | +| `llamafile --server` | backend startup CLI, usually `-m /path/to/model.gguf` | `proxy_url` | +| `llama-server` | backend startup CLI, usually `-m /path/to/model.gguf` | `proxy_url` | +| Ollama | request JSON `model`, optionally injected by the gateway | `proxy_url`, `defaults.model` | + +### For `type: discovered` + +The gateway still does not point directly to a weight file. It points to a role served by a registered node. +The actual weight file is defined on the node side, usually in the node-agent config: + +```yaml +models: + - model_id: "planner-gguf" + path: "/models/SomePlannerModel.Q5_K_M.gguf" + roles: ["planner"] +``` + +In that setup: +- gateway alias -> discovered role +- discovered role -> registered node +- node-agent `path` -> actual weight file on disk + ## Proxy models Route to a fixed upstream (any host reachable from the gateway): diff --git a/docs/NODE_AGENT.md b/docs/NODE_AGENT.md index 6a8e5de..0006da5 100644 --- a/docs/NODE_AGENT.md +++ b/docs/NODE_AGENT.md @@ -7,6 +7,21 @@ The **RoleMesh Node Agent** runs on each compute host and manages **persistent** - register + heartbeat to the Dispatcher/Gateway (`/v1/nodes/register`, `/v1/nodes/heartbeat`) - report inventory + utilization (`/v1/node/inventory`) +## Where the weight file is configured + +For the node agent, the actual model weights are specified directly in the node-agent config under `models[].path`: + +```yaml +models: + - model_id: "planner-gguf" + path: "/models/SomePlannerModel.Q5_K_M.gguf" + roles: ["planner"] +``` + +- `model_id`: name exposed by the node agent API +- `path`: exact GGUF file to load +- `roles`: role labels this model can satisfy when the node registers with a gateway + ## Persistent server model For each GPU device, the node agent starts a dedicated `llama-server` process, pinned via