From 8201dd83ee3ab6fb623e591fd8a835d9721d3316 Mon Sep 17 00:00:00 2001 From: welsberr Date: Fri, 8 May 2026 01:18:06 -0400 Subject: [PATCH] Support separate bibliography roots in review export --- src/groundrecall/review_export.py | 23 ++++++++++--- tests/test_groundrecall_review_workspace.py | 37 +++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/groundrecall/review_export.py b/src/groundrecall/review_export.py index 1ac0aea..23a97c0 100644 --- a/src/groundrecall/review_export.py +++ b/src/groundrecall/review_export.py @@ -177,16 +177,27 @@ def _resolve_source_root(import_dir: Path, source_root: str) -> str: return str((import_dir.parent.parent / root).resolve()) +def _resolve_bibliography_root(import_dir: Path, manifest: dict[str, Any], resolved_source_root: str) -> str: + bibliography_root = str(manifest.get("bibliography_root", "")).strip() + if not bibliography_root: + return resolved_source_root + root = Path(bibliography_root) + if root.is_absolute(): + return str(root) + return str((import_dir.parent.parent / root).resolve()) + + def _artifact_citation_payloads( artifacts: list[dict[str, Any]], *, source_root: str, + bibliography_root: str | None = None, ) -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]: extract_references, backends = _load_citegeist_extract() artifact_payloads: list[dict[str, Any]] = [] summaries: dict[str, dict[str, Any]] = {} root = Path(source_root) if source_root else None - bibliography_index = load_bibliography_index(source_root) if source_root else {} + bibliography_index = load_bibliography_index(bibliography_root or source_root) if (bibliography_root or source_root) else {} for artifact in artifacts: path = Path(source_root) / artifact["path"] if root is not None else None @@ -280,14 +291,16 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci base = Path(import_dir) manifest = _read_json(base / "manifest.json") resolved_source_root = _resolve_source_root(base, manifest.get("source_root", "")) + resolved_bibliography_root = _resolve_bibliography_root(base, manifest, resolved_source_root) artifacts = _read_jsonl(base / "artifacts.jsonl") observations = _read_jsonl(base / "observations.jsonl") claims = _read_jsonl(base / "claims.jsonl") - bibliography_index = load_bibliography_index(resolved_source_root) + bibliography_index = load_bibliography_index(resolved_bibliography_root) artifact_payloads, _ = _artifact_citation_payloads( artifacts, source_root=resolved_source_root, + bibliography_root=resolved_bibliography_root, ) observations_by_id = {item["observation_id"]: item for item in observations} artifact_claim_links: dict[str, dict[str, set[str]]] = defaultdict(lambda: {"claim_ids": set(), "concept_ids": set()}) @@ -357,7 +370,8 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> dict[str, Any]: manifest = _read_json(import_dir / "manifest.json") resolved_source_root = _resolve_source_root(import_dir, manifest.get("source_root", "")) - bibliography_index = load_bibliography_index(resolved_source_root) if resolved_source_root else {} + resolved_bibliography_root = _resolve_bibliography_root(import_dir, manifest, resolved_source_root) + bibliography_index = load_bibliography_index(resolved_bibliography_root) if resolved_bibliography_root else {} lint_payload = _read_json(import_dir / "lint_findings.json") queue_payload = _read_json(import_dir / "review_queue.json") graph_payload = _read_json(import_dir / "graph_diagnostics.json") @@ -377,6 +391,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di artifact_citations, artifact_citation_summary = _artifact_citation_payloads( artifacts, source_root=resolved_source_root, + bibliography_root=resolved_bibliography_root, ) artifact_by_id = {item["artifact_id"]: item for item in artifacts} queue_by_candidate_id = { @@ -510,7 +525,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di ], "concept_reviews": concept_reviews, "citation_reviews": [entry.model_dump() for entry in session.citation_reviews], - "bibliography": bibliography_summary_payload(resolved_source_root), + "bibliography": bibliography_summary_payload(resolved_bibliography_root), "graph_diagnostics": graph_payload, "citations": { "enabled": True, diff --git a/tests/test_groundrecall_review_workspace.py b/tests/test_groundrecall_review_workspace.py index c23e5f6..9ed6583 100644 --- a/tests/test_groundrecall_review_workspace.py +++ b/tests/test_groundrecall_review_workspace.py @@ -139,3 +139,40 @@ def test_review_workspace_surfaces_local_bibliography_support_suggestions(tmp_pa assert suggestions assert suggestions[0]["citation_key"] == "kimura1968evolutionary" assert "abstract" in suggestions[0]["reason"].lower() or "title" in suggestions[0]["reason"].lower() + + +def test_review_workspace_can_use_separate_bibliography_root(tmp_path: Path) -> None: + root = tmp_path / "pilot" + source_root = root / "source" + bib_root = root / "bibliography" + (source_root / "wiki").mkdir(parents=True) + bib_root.mkdir(parents=True) + (source_root / "wiki" / "drift.md").write_text( + "# Drift\n\n" + "- Random genetic drift can dominate allele-frequency change in small populations.\n", + encoding="utf-8", + ) + (bib_root / "refs.bib").write_text( + "@article{kimura1968evolutionary,\n" + " author = {Motoo Kimura},\n" + " title = {Evolutionary Rate at the Molecular Level},\n" + " journal = {Nature},\n" + " year = {1968},\n" + " abstract = {The rate of molecular evolution is compatible with neutral changes driven by random genetic drift in populations.}\n" + "}\n", + encoding="utf-8", + ) + + import_result = run_groundrecall_import(source_root, out_root=tmp_path / "imports", mode="quick", import_id="separate-bib-root") + manifest_path = import_result.out_dir / "manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest["bibliography_root"] = str(bib_root) + manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8") + + workspace = GroundRecallReviewWorkspace(import_result.out_dir) + payload = workspace.load_review_data() + concept_review = next(item for item in payload["concept_reviews"] if item["concept_id"] == "drift") + suggestions = concept_review["top_claims"][0]["support_suggestions"] + assert payload["bibliography"]["entry_count"] == 1 + assert suggestions + assert suggestions[0]["citation_key"] == "kimura1968evolutionary"