From 0226f7526d05cdb8d6512467b236c0079c1d99ea Mon Sep 17 00:00:00 2001
From: welsberr <welsberr@gmail.com>
Date: Mon, 16 Mar 2026 22:35:09 -0400
Subject: [PATCH] Clarified model weight selection in different modes

---
 README.md                       | 56 +++++++++++++++++++++++++++++++++
 configs/models.example.yaml     |  2 ++
 configs/node_agent.example.yaml |  1 +
 docs/CONFIG.md                  | 45 ++++++++++++++++++++++++++
 docs/NODE_AGENT.md              | 15 +++++++++
 5 files changed, 119 insertions(+)

diff --git a/README.md b/README.md
index a5fb22f..3a7f3f8 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,59 @@ Examples of project-specific roles:
 
 If your workflow changes, update the `models:` section in config rather than treating the example roles as required.
 
+## Where model weights are defined
+
+There are two different patterns in this project, and the model-weight location is defined in different places depending on which one you use.
+
+### Proxy mode
+
+In gateway proxy mode, the gateway does **not** point directly to a GGUF or other weight file.
+It only points to an upstream inference server:
+
+```yaml
+models:
+  planner:
+    type: proxy
+    proxy_url: http://127.0.0.1:8011
+```
+
+In that setup, the actual model weights are chosen by the upstream server itself.
+Examples:
+
+- `llamafile --server -m /path/to/model.gguf ...`
+- `llama-server -m /path/to/model.gguf ...`
+- Ollama with `defaults.model: some-model-name`
+
+So in proxy mode:
+- RoleMesh alias `planner` -> upstream server at `proxy_url`
+- upstream server -> actual weight file or model name
+
+| Upstream type | Where weights/model are chosen | What RoleMesh config provides |
+| --- | --- | --- |
+| `llamafile --server` | CLI `-m /path/to/model.gguf` when the server starts | `proxy_url` |
+| `llama-server` | CLI `-m /path/to/model.gguf` when the server starts | `proxy_url` |
+| Ollama OpenAI-compatible API | request body `model`, often injected via `defaults.model` | `proxy_url` plus optional `defaults.model` |
+
+### Node-agent mode
+
+In node-agent mode, the weight file is defined explicitly in the node-agent config:
+
+```yaml
+models:
+  - model_id: "planner-gguf"
+    path: "/models/SomePlannerModel.Q5_K_M.gguf"
+    roles: ["planner"]
+```
+
+In that setup:
+- `model_id` is the model name exposed by the node agent
+- `path` is the actual GGUF weight file to load
+- `roles` are the role labels that node can serve if used with discovery
+
+So in node-agent mode:
+- node-agent `model_id` -> exact weight file path via `path`
+- gateway discovered alias -> node role -> node-agent model load
+
 ## Quick Start
 
 This is the fastest path to a working local setup.
@@ -89,6 +142,7 @@ models:
 Save that as `configs/models.yaml`.
 
 You are not limited to `planner` and `writer`. Those are just placeholders for whatever roles your project needs.
+In this proxy example, the actual weight files are defined by the two backend processes started in step 2, not by the gateway config.
 
 ### 4. Run the gateway
 
@@ -156,6 +210,8 @@ Example launch:
 ./llamafile --server -m /path/to/model.gguf --host 127.0.0.1 --port 8011 --nobrowser
 ```
 
+In this case, `/path/to/model.gguf` is where the actual weights are chosen, and RoleMesh only points to that running server.
+
 ### llama.cpp / llama-server
 
 - Verified live through the RoleMesh Node Agent on NVIDIA GPUs
diff --git a/configs/models.example.yaml b/configs/models.example.yaml
index 7e67a89..00b0d02 100644
--- a/configs/models.example.yaml
+++ b/configs/models.example.yaml
@@ -19,6 +19,8 @@ auth:
 #  - type: discovered  (resolved from registered nodes by role)
 # The names under "models" are project-defined role aliases, not a fixed built-in list.
 # Rename or replace planner/writer/coder/reviewer with whatever your workflow needs.
+# In proxy mode, the actual weight file is chosen by the upstream server behind proxy_url.
+# In discovered mode, the actual weight file is chosen on the node side (for example via node-agent models[].path).
 models:
   planner:
     type: proxy
diff --git a/configs/node_agent.example.yaml b/configs/node_agent.example.yaml
index e16a6c5..96e2211 100644
--- a/configs/node_agent.example.yaml
+++ b/configs/node_agent.example.yaml
@@ -18,6 +18,7 @@ model_roots:
 
 models:
   - model_id: "planner-gguf"
+    # path is the exact GGUF file that this model_id will load when requested
     path: "/models/SomePlannerModel.Q5_K_M.gguf"
     roles: ["planner"]
     default_ctx: 8192
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index 8a11aa5..4ad649f 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -51,6 +51,51 @@ models:
     proxy_url: http://127.0.0.1:8013
 ```
 
+## Where the actual model weights are selected
+
+This depends on the backend pattern.
+
+### For `type: proxy`
+
+The gateway alias does **not** point directly to a weight file. It points to an already-running inference server:
+
+```yaml
+models:
+  writer:
+    type: proxy
+    proxy_url: http://127.0.0.1:8012
+```
+
+The actual model weights are chosen by that upstream server, not by RoleMesh Gateway.
+
+Examples:
+- `llamafile --server -m /path/to/model.gguf ...`
+- `llama-server -m /path/to/model.gguf ...`
+- Ollama with `defaults.model: dolphin3:latest`
+
+| Upstream type | Where weights/model are chosen | RoleMesh fields involved |
+| --- | --- | --- |
+| `llamafile --server` | backend startup CLI, usually `-m /path/to/model.gguf` | `proxy_url` |
+| `llama-server` | backend startup CLI, usually `-m /path/to/model.gguf` | `proxy_url` |
+| Ollama | request JSON `model`, optionally injected by the gateway | `proxy_url`, `defaults.model` |
+
+### For `type: discovered`
+
+The gateway still does not point directly to a weight file. It points to a role served by a registered node.
+The actual weight file is defined on the node side, usually in the node-agent config:
+
+```yaml
+models:
+  - model_id: "planner-gguf"
+    path: "/models/SomePlannerModel.Q5_K_M.gguf"
+    roles: ["planner"]
+```
+
+In that setup:
+- gateway alias -> discovered role
+- discovered role -> registered node
+- node-agent `path` -> actual weight file on disk
+
 ## Proxy models
 
 Route to a fixed upstream (any host reachable from the gateway):
diff --git a/docs/NODE_AGENT.md b/docs/NODE_AGENT.md
index 6a8e5de..0006da5 100644
--- a/docs/NODE_AGENT.md
+++ b/docs/NODE_AGENT.md
@@ -7,6 +7,21 @@ The **RoleMesh Node Agent** runs on each compute host and manages **persistent**
 - register + heartbeat to the Dispatcher/Gateway (`/v1/nodes/register`, `/v1/nodes/heartbeat`)
 - report inventory + utilization (`/v1/node/inventory`)
 
+## Where the weight file is configured
+
+For the node agent, the actual model weights are specified directly in the node-agent config under `models[].path`:
+
+```yaml
+models:
+  - model_id: "planner-gguf"
+    path: "/models/SomePlannerModel.Q5_K_M.gguf"
+    roles: ["planner"]
+```
+
+- `model_id`: name exposed by the node agent API
+- `path`: exact GGUF file to load
+- `roles`: role labels this model can satisfy when the node registers with a gateway
+
 ## Persistent server model
 
 For each GPU device, the node agent starts a dedicated `llama-server` process, pinned via