From 8201dd83ee3ab6fb623e591fd8a835d9721d3316 Mon Sep 17 00:00:00 2001
From: welsberr <welsberr@gmail.com>
Date: Fri, 8 May 2026 01:18:06 -0400
Subject: [PATCH] Support separate bibliography roots in review export

---
 src/groundrecall/review_export.py           | 23 ++++++++++---
 tests/test_groundrecall_review_workspace.py | 37 +++++++++++++++++++++
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/groundrecall/review_export.py b/src/groundrecall/review_export.py
index 1ac0aea..23a97c0 100644
--- a/src/groundrecall/review_export.py
+++ b/src/groundrecall/review_export.py
@@ -177,16 +177,27 @@ def _resolve_source_root(import_dir: Path, source_root: str) -> str:
     return str((import_dir.parent.parent / root).resolve())
 
 
+def _resolve_bibliography_root(import_dir: Path, manifest: dict[str, Any], resolved_source_root: str) -> str:
+    bibliography_root = str(manifest.get("bibliography_root", "")).strip()
+    if not bibliography_root:
+        return resolved_source_root
+    root = Path(bibliography_root)
+    if root.is_absolute():
+        return str(root)
+    return str((import_dir.parent.parent / root).resolve())
+
+
 def _artifact_citation_payloads(
     artifacts: list[dict[str, Any]],
     *,
     source_root: str,
+    bibliography_root: str | None = None,
 ) -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]:
     extract_references, backends = _load_citegeist_extract()
     artifact_payloads: list[dict[str, Any]] = []
     summaries: dict[str, dict[str, Any]] = {}
     root = Path(source_root) if source_root else None
-    bibliography_index = load_bibliography_index(source_root) if source_root else {}
+    bibliography_index = load_bibliography_index(bibliography_root or source_root) if (bibliography_root or source_root) else {}
 
     for artifact in artifacts:
         path = Path(source_root) / artifact["path"] if root is not None else None
@@ -280,14 +291,16 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci
     base = Path(import_dir)
     manifest = _read_json(base / "manifest.json")
     resolved_source_root = _resolve_source_root(base, manifest.get("source_root", ""))
+    resolved_bibliography_root = _resolve_bibliography_root(base, manifest, resolved_source_root)
     artifacts = _read_jsonl(base / "artifacts.jsonl")
     observations = _read_jsonl(base / "observations.jsonl")
     claims = _read_jsonl(base / "claims.jsonl")
-    bibliography_index = load_bibliography_index(resolved_source_root)
+    bibliography_index = load_bibliography_index(resolved_bibliography_root)
 
     artifact_payloads, _ = _artifact_citation_payloads(
         artifacts,
         source_root=resolved_source_root,
+        bibliography_root=resolved_bibliography_root,
     )
     observations_by_id = {item["observation_id"]: item for item in observations}
     artifact_claim_links: dict[str, dict[str, set[str]]] = defaultdict(lambda: {"claim_ids": set(), "concept_ids": set()})
@@ -357,7 +370,8 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci
 def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> dict[str, Any]:
     manifest = _read_json(import_dir / "manifest.json")
     resolved_source_root = _resolve_source_root(import_dir, manifest.get("source_root", ""))
-    bibliography_index = load_bibliography_index(resolved_source_root) if resolved_source_root else {}
+    resolved_bibliography_root = _resolve_bibliography_root(import_dir, manifest, resolved_source_root)
+    bibliography_index = load_bibliography_index(resolved_bibliography_root) if resolved_bibliography_root else {}
     lint_payload = _read_json(import_dir / "lint_findings.json")
     queue_payload = _read_json(import_dir / "review_queue.json")
     graph_payload = _read_json(import_dir / "graph_diagnostics.json")
@@ -377,6 +391,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
     artifact_citations, artifact_citation_summary = _artifact_citation_payloads(
         artifacts,
         source_root=resolved_source_root,
+        bibliography_root=resolved_bibliography_root,
     )
     artifact_by_id = {item["artifact_id"]: item for item in artifacts}
     queue_by_candidate_id = {
@@ -510,7 +525,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
         ],
         "concept_reviews": concept_reviews,
         "citation_reviews": [entry.model_dump() for entry in session.citation_reviews],
-        "bibliography": bibliography_summary_payload(resolved_source_root),
+        "bibliography": bibliography_summary_payload(resolved_bibliography_root),
         "graph_diagnostics": graph_payload,
         "citations": {
             "enabled": True,
diff --git a/tests/test_groundrecall_review_workspace.py b/tests/test_groundrecall_review_workspace.py
index c23e5f6..9ed6583 100644
--- a/tests/test_groundrecall_review_workspace.py
+++ b/tests/test_groundrecall_review_workspace.py
@@ -139,3 +139,40 @@ def test_review_workspace_surfaces_local_bibliography_support_suggestions(tmp_pa
     assert suggestions
     assert suggestions[0]["citation_key"] == "kimura1968evolutionary"
     assert "abstract" in suggestions[0]["reason"].lower() or "title" in suggestions[0]["reason"].lower()
+
+
+def test_review_workspace_can_use_separate_bibliography_root(tmp_path: Path) -> None:
+    root = tmp_path / "pilot"
+    source_root = root / "source"
+    bib_root = root / "bibliography"
+    (source_root / "wiki").mkdir(parents=True)
+    bib_root.mkdir(parents=True)
+    (source_root / "wiki" / "drift.md").write_text(
+        "# Drift\n\n"
+        "- Random genetic drift can dominate allele-frequency change in small populations.\n",
+        encoding="utf-8",
+    )
+    (bib_root / "refs.bib").write_text(
+        "@article{kimura1968evolutionary,\n"
+        "  author = {Motoo Kimura},\n"
+        "  title = {Evolutionary Rate at the Molecular Level},\n"
+        "  journal = {Nature},\n"
+        "  year = {1968},\n"
+        "  abstract = {The rate of molecular evolution is compatible with neutral changes driven by random genetic drift in populations.}\n"
+        "}\n",
+        encoding="utf-8",
+    )
+
+    import_result = run_groundrecall_import(source_root, out_root=tmp_path / "imports", mode="quick", import_id="separate-bib-root")
+    manifest_path = import_result.out_dir / "manifest.json"
+    manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
+    manifest["bibliography_root"] = str(bib_root)
+    manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+
+    workspace = GroundRecallReviewWorkspace(import_result.out_dir)
+    payload = workspace.load_review_data()
+    concept_review = next(item for item in payload["concept_reviews"] if item["concept_id"] == "drift")
+    suggestions = concept_review["top_claims"][0]["support_suggestions"]
+    assert payload["bibliography"]["entry_count"] == 1
+    assert suggestions
+    assert suggestions[0]["citation_key"] == "kimura1968evolutionary"