LLM verify + fixes + tests

2026-04-21 03:15:33 -04:00 · 2026-04-21 03:15:33 -04:00 · 4894341ba8
parent 65fde034e1
commit 4894341ba8
13 changed files with 751 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,5 @@ __pycache__/
 *.egg-info/
 library.sqlite3
 ops/
 .codex
 SESSION_*
--- a/5
+++ b/5
@ -1,7 +1,7 @@
 PYTHONPATH_SRC=PYTHONPATH=src
 VENV_PYTHON=.venv/bin/python
-.PHONY: test test-live live-smoke validate-talkorigins
+.PHONY: test test-live live-smoke live-verify-llm-smoke validate-talkorigins
 test:
 	$(PYTHONPATH_SRC) $(VENV_PYTHON) -m pytest -q
@ -12,5 +12,8 @@ test-live:
 live-smoke:
 	CITEGEIST_SOURCE_CACHE=.cache/citegeist $(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_smoke.py
 live-verify-llm-smoke:
 	$(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_verify_llm_smoke.py
 validate-talkorigins:
 	$(PYTHONPATH_SRC) $(VENV_PYTHON) -m citegeist validate-talkorigins talkorigins-out/talkorigins_manifest.json
--- a/README.md
+++ b/README.md
@ -172,6 +172,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --ba
 PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --output compare-summary.json
 PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --max-rows-with-differences 0 --output compare-check.json
 PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
 PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
 PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
@ -257,6 +258,58 @@ The built-in extraction backends are:
 The backend interface exists so future GROBID- or other parser adapters can be registered without replacing the local parser or changing the CLI contract.
 ## LLM-Assisted Verify
 `citegeist verify` can optionally use a local LLM for two bounded tasks:
 - `expand`: infer missing bibliographic clues from free text and context
 - `rerank`: advisory reranking of already fetched resolver candidates
 Example:
 ```bash
 PYTHONPATH=src .venv/bin/python -m citegeist verify \
  --string 'Evans 1960' \
  --context "bottlenose dolphin echolocation" \
  --llm \
  --llm-base-url http://localhost:11434 \
  --llm-model qwen3 \
  --llm-role both \
  --format json
 ```
 Supported local endpoint styles:
 - OpenAI-compatible APIs such as `http://localhost:11434/v1`
 - Ollama native chat APIs such as `http://localhost:11434`
 For the current local GenieHive setup, this also works directly:
 ```bash
 PYTHONPATH=src .venv/bin/python -m citegeist verify \
  --string 'Evans 1960' \
  --context "bottlenose dolphin echolocation" \
  --llm \
  --llm-base-url http://127.0.0.1:8800/v1 \
  --llm-api-key change-me-client-key \
  --llm-model general_assistant \
  --llm-role both \
  --format json
 ```
 There is also a local smoke script for the LLM helper path alone:
 ```bash
 make live-verify-llm-smoke
 ```
 Safety constraints:
 - the LLM is never trusted for DOI or identifier invention
 - the LLM only fills missing query clues or suggests candidate order
 - `exact` status still requires verified resolver evidence, not LLM output
 - if the LLM fails or returns unusable JSON, `verify` falls back to the normal resolver-only path
 To compare backend output on the same plaintext references, use `compare-extract`. It aligns entries by ordinal/reference block and emits JSON with per-backend payloads plus a `differing_fields` summary for each row. Add `--summary` when you want a compact evaluation artifact with disagreement counts by field and backend presence counts instead of the full row-by-row payload. Add `--max-rows-with-differences` and/or `--max-field-difference-count` when you want CI-style failure thresholds; the command will emit the summary JSON and return a nonzero exit code if the limits are exceeded.
 For regression-oriented parser work, keep a small curated plaintext fixture set and run `compare-extract` against multiple backends before changing heuristics. That makes backend disagreement explicit and gives you a stable review artifact for parser changes.
--- a/scripts/live_verify_llm_smoke.py
+++ b/scripts/live_verify_llm_smoke.py
@ -0,0 +1,97 @@
 from __future__ import annotations
 import argparse
 import json
 import os
 from citegeist.bibtex import BibEntry
 from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Run live LLM verify smoke checks against a local OpenAI-compatible endpoint")
    parser.add_argument(
        "--base-url",
        default=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
        help="OpenAI-compatible or Ollama base URL",
    )
    parser.add_argument(
        "--model",
        default=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
        help="Model or route ID exposed by the local endpoint",
    )
    parser.add_argument(
        "--api-key",
        default=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
        help="Optional API key for the local endpoint",
    )
    parser.add_argument(
        "--provider",
        default=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
        choices=["auto", "openai", "ollama-native"],
        help="Endpoint protocol style",
    )
    return parser
 def main() -> int:
    args = build_parser().parse_args()
    client = VerificationLlmClient()
    config = VerificationLlmConfig(
        base_url=args.base_url,
        model=args.model,
        api_key=args.api_key,
        provider=args.provider,
        role="both",
    )
    analysis = client.analyze_query(
        config,
        "Evans 1960",
        "marine mammals; bottlenose dolphin echolocation",
    )
    rerank = client.rerank_candidates(
        config,
        {"title": "", "authors": ["Evans"], "year": "1960", "venue": ""},
        "bottlenose dolphin echolocation",
        [
            BibEntry(
                entry_type="article",
                citation_key="candidate_a",
                fields={
                    "author": "Doe, Jane",
                    "title": "General Marine Biology Survey",
                    "year": "1960",
                    "journal": "Marine Science",
                },
            ),
            BibEntry(
                entry_type="article",
                citation_key="candidate_b",
                fields={
                    "author": "Evans, William",
                    "title": "Echolocation by marine dolphins",
                    "year": "1960",
                    "journal": "Journal of the Acoustical Society",
                },
            ),
        ],
    )
    print(
        json.dumps(
            {
                "base_url": args.base_url,
                "model": args.model,
                "analysis": analysis,
                "rerank": rerank,
            },
            indent=2,
            sort_keys=True,
        )
    )
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/citegeist/init.py
+++ b/src/citegeist/init.py
@ -13,6 +13,7 @@ from .extract import (
    summarize_extraction_comparison,
 )
 from .harvest import OaiMetadataFormat, OaiPmhHarvester, OaiSet
 from .llm_verify import VerificationLlmClient, VerificationLlmConfig
 from .resolve import MetadataResolver, merge_entries, merge_entries_with_conflicts
 from .sources import SourceClient
 from .storage import BibliographyStore
@ -34,6 +35,8 @@ __all__ = [
    "OaiMetadataFormat",
    "OaiSet",
    "SourceClient",
    "VerificationLlmClient",
    "VerificationLlmConfig",
    "VerificationMatch",
    "VerificationResult",
    "available_extraction_backends",
--- a/src/citegeist/cli.py
+++ b/src/citegeist/cli.py
@ -20,6 +20,7 @@ from .extract import (
    summarize_extraction_comparison,
 )
 from .harvest import OaiPmhHarvester
 from .llm_verify import VerificationLlmConfig
 from .resolve import MetadataResolver, merge_entries_with_conflicts
 from .storage import BibliographyStore
 from .verify import BibliographyVerifier, render_verification_results
@ -145,6 +146,22 @@ def build_parser() -> argparse.ArgumentParser:
    verify_group.add_argument("--bib", help="Path to a BibTeX file whose entries should be verified")
    verify_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
    verify_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per input")
    verify_parser.add_argument("--llm", action="store_true", help="Enable optional local LLM assistance for verify")
    verify_parser.add_argument("--llm-base-url", help="OpenAI-compatible or Ollama base URL for local LLM assistance")
    verify_parser.add_argument("--llm-model", help="Model ID for local LLM assistance")
    verify_parser.add_argument("--llm-api-key", default="", help="Optional API key for the LLM endpoint")
    verify_parser.add_argument(
        "--llm-provider",
        choices=["auto", "openai", "ollama-native"],
        default="auto",
        help="LLM API style; auto treats `/v1` endpoints as OpenAI-compatible",
    )
    verify_parser.add_argument(
        "--llm-role",
        choices=["expand", "rerank", "both"],
        default="both",
        help="Use the local LLM for query-clue extraction, candidate reranking, or both",
    )
    verify_parser.add_argument(
        "--format",
        choices=["bibtex", "json"],
@ -715,7 +732,21 @@ def main(argv: list[str] | None = None) -> int:
                args.output,
            )
        if args.command == "verify":
-            return _run_verify(args.string, args.list_input, args.bib, args.context, args.limit, args.format, args.output)
+            return _run_verify(
                args.string,
                args.list_input,
                args.bib,
                args.context,
                args.limit,
                args.format,
                args.output,
                llm_enabled=args.llm,
                llm_base_url=args.llm_base_url,
                llm_model=args.llm_model,
                llm_api_key=args.llm_api_key,
                llm_provider=args.llm_provider,
                llm_role=args.llm_role,
            )
        if args.command == "resolve":
            return _run_resolve(store, args.citation_keys)
        if args.command == "resolve-stubs":
@ -750,8 +781,6 @@ def main(argv: list[str] | None = None) -> int:
                args.rounds,
                args.recent_years,
                args.target_recent_entries,
                args.max_expanded_entries,
                args.max_expand_seconds,
            )
        if args.command == "set-topic-phrase":
            return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
@ -785,6 +814,8 @@ def main(argv: list[str] | None = None) -> int:
                args.expansion_rounds,
                args.recent_years,
                args.target_recent_entries,
                args.max_expanded_entries,
                args.max_expand_seconds,
            )
        if args.command == "bootstrap-batch":
            return _run_bootstrap_batch(store, Path(args.input))
@ -1121,8 +1152,27 @@ def _run_verify(
    limit: int,
    output_format: str,
    output: str | None,
    *,
    llm_enabled: bool = False,
    llm_base_url: str | None = None,
    llm_model: str | None = None,
    llm_api_key: str = "",
    llm_provider: str = "auto",
    llm_role: str = "both",
 ) -> int:
-    verifier = BibliographyVerifier()
+    llm_config = None
    if llm_enabled:
        if not llm_base_url or not llm_model:
            print("--llm requires --llm-base-url and --llm-model", file=sys.stderr)
            return 1
        llm_config = VerificationLlmConfig(
            base_url=llm_base_url,
            model=llm_model,
            api_key=llm_api_key,
            provider=llm_provider,
            role=llm_role,
        )
    verifier = BibliographyVerifier(llm_config=llm_config)
    if string_input is not None:
        results = [verifier.verify_string(string_input, context=context, limit=limit)]
    elif list_input is not None:
--- a/src/citegeist/expand.py
+++ b/src/citegeist/expand.py
@ -196,7 +196,7 @@ class OpenAlexExpander:
            )
            results.append(
                ExpansionResult(
-                    source_citation_key=citation_key,
+                    source_citation_key=source_key,
                    discovered_citation_key=existing_key or discovered.citation_key,
                    created_entry=created,
                    relation_type=relation_type,
@ -335,7 +335,7 @@ class TopicExpander:
                            assigned_to_topic=assigned,
                        )
                    )
-                    if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
+                    if target_recent_entries is not None and len(recent_hits) >= target_recent_entries:
                        self.last_run_meta.update({
                            "stop_reason": "target_recent_entries",
                            "recent_hits": len(recent_hits),
--- a/src/citegeist/llm_verify.py
+++ b/src/citegeist/llm_verify.py
@ -0,0 +1,218 @@
 from __future__ import annotations
 import json
 import re
 import urllib.request
 from dataclasses import dataclass
 from typing import Any, Callable
 from .bibtex import BibEntry
 DEFAULT_SYSTEM_PROMPT = (
    "You are a meticulous bibliography verification assistant. "
    "You never invent DOIs, page ranges, venues, or identifiers. "
    "You may only suggest missing clues from the provided input and context. "
    "When uncertain, return null or an empty list. "
    "Always respond with strict JSON matching the requested shape."
 )
@dataclass(slots=True)
 class VerificationLlmConfig:
    base_url: str
    model: str
    api_key: str = ""
    provider: str = "auto"
    role: str = "both"
    def enabled_for(self, capability: str) -> bool:
        return bool(self.base_url and self.model) and self.role in {capability, "both"}
 class VerificationLlmClient:
    def __init__(
        self,
        *,
        timeout_s: int = 60,
        post_json: Callable[[str, dict[str, Any], dict[str, str], int], dict[str, Any]] | None = None,
    ) -> None:
        self.timeout_s = timeout_s
        self._post_json = post_json or _default_post_json
    def analyze_query(
        self,
        config: VerificationLlmConfig,
        free_text: str,
        context: str,
    ) -> dict[str, Any] | None:
        if not config.enabled_for("expand"):
            return None
        payload = {
            "task": "extract_bibliographic_clues",
            "input": {"free_text": free_text, "context": context},
            "rules": [
                "Never invent a DOI or identifier.",
                "Only fill clues that plausibly follow from the input and context.",
                "Return null for unknown scalar fields.",
            ],
            "schema": {
                "type": "object",
                "properties": {
                    "title": {"type": ["string", "null"]},
                    "authors": {"type": "array", "items": {"type": "string"}},
                    "year": {"type": ["string", "null"]},
                    "venue": {"type": ["string", "null"]},
                    "keywords": {"type": "array", "items": {"type": "string"}},
                },
                "required": ["authors", "keywords"],
            },
        }
        result = self._chat_json(config, payload)
        if not isinstance(result, dict):
            return None
        authors = [str(value).strip() for value in result.get("authors", []) if str(value).strip()]
        keywords = [str(value).strip() for value in result.get("keywords", []) if str(value).strip()]
        return {
            "title": _optional_string(result.get("title")),
            "authors": authors,
            "year": _optional_string(result.get("year")),
            "venue": _optional_string(result.get("venue")),
            "keywords": keywords,
        }
    def rerank_candidates(
        self,
        config: VerificationLlmConfig,
        query_fields: dict[str, object],
        context: str,
        candidates: list[BibEntry],
    ) -> list[int] | None:
        if not config.enabled_for("rerank") or not candidates:
            return None
        payload = {
            "task": "rerank_candidates",
            "instruction": (
                "Return a JSON array of candidate indices sorted best to worst. "
                "Do not invent metadata. Prefer candidates that better match the given clues."
            ),
            "input": {
                "query_fields": query_fields,
                "context": context,
                "candidates": [
                    {
                        "title": entry.fields.get("title", ""),
                        "authors": entry.fields.get("author", "").split(" and ") if entry.fields.get("author") else [],
                        "year": entry.fields.get("year", ""),
                        "venue": entry.fields.get("journal", "") or entry.fields.get("booktitle", ""),
                        "doi": entry.fields.get("doi", ""),
                    }
                    for entry in candidates[:8]
                ],
            },
        }
        result = self._chat_json(config, payload)
        if not isinstance(result, list):
            return None
        indices = [value for value in result if isinstance(value, int) and 0 <= value < len(candidates)]
        return indices or None
    def _chat_json(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
        try:
            if _llm_mode(config.base_url, config.provider) == "openai":
                return self._chat_openai(config, payload)
            return self._chat_ollama_native(config, payload)
        except Exception:
            return None
    def _chat_openai(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
        headers = {"Content-Type": "application/json"}
        if config.api_key:
            headers["Authorization"] = f"Bearer {config.api_key}"
        body = {
            "model": config.model,
            "temperature": 0,
            "messages": [
                {"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
                {"role": "user", "content": json.dumps(payload)},
            ],
        }
        data = self._post_json(
            config.base_url.rstrip("/") + "/chat/completions",
            body,
            headers,
            self.timeout_s,
        )
        content = data["choices"][0]["message"]["content"]
        return _loads_lenient_json(content)
    def _chat_ollama_native(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
        base_url = config.base_url.rstrip("/")
        if base_url.endswith("/v1"):
            base_url = base_url[:-3]
        body = {
            "model": config.model,
            "messages": [
                {"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
                {"role": "user", "content": json.dumps(payload)},
            ],
            "options": {"temperature": 0},
            "stream": False,
        }
        headers = {"Content-Type": "application/json"}
        if config.api_key:
            headers["Authorization"] = f"Bearer {config.api_key}"
        data = self._post_json(
            base_url + "/api/chat",
            body,
            headers,
            self.timeout_s,
        )
        content = data["message"]["content"]
        return _loads_lenient_json(content)
 def _default_post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout_s: int) -> dict[str, Any]:
    request = urllib.request.Request(
        url,
        data=json.dumps(payload).encode("utf-8"),
        headers=headers,
        method="POST",
    )
    with urllib.request.urlopen(request, timeout=timeout_s) as response:
        return json.loads(response.read().decode("utf-8"))
 def _llm_mode(base_url: str, provider: str) -> str:
    if provider == "openai":
        return "openai"
    if provider == "ollama-native":
        return "ollama-native"
    return "openai" if base_url.rstrip("/").endswith("/v1") else "ollama-native"
 def _optional_string(value: object) -> str | None:
    text = str(value or "").strip()
    return text or None
 def _loads_lenient_json(content: str) -> Any:
    try:
        return json.loads(content)
    except Exception:
        pass
    fenced = re.search(r"```(?:json)?\s*(\{.*\}|\[.*\])\s*```", content, flags=re.DOTALL)
    if fenced:
        return json.loads(fenced.group(1))
    for opener, closer in (("{", "}"), ("[", "]")):
        start = content.find(opener)
        end = content.rfind(closer)
        if start != -1 and end != -1 and end > start:
            snippet = content[start : end + 1]
            try:
                return json.loads(snippet)
            except Exception:
                continue
    raise ValueError("Model response did not contain parseable JSON")
--- a/src/citegeist/storage.py
+++ b/src/citegeist/storage.py
@ -246,7 +246,13 @@ class BibliographyStore:
                entry.fields.get("isbn"),
                fulltext,
                raw_bibtex,
-                json.dumps({k: v for k, v in entry.fields.items() if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS}),
+                json.dumps(
                    {
                        k: v
                        for k, v in entry.fields.items()
                        if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS and k not in {"author", "editor"}
                    }
                ),
            ),
        ).fetchone()
        entry_id = int(row["id"])
@ -1142,6 +1148,8 @@ class BibliographyStore:
        extra_fields = json.loads(row["extra_fields_json"])
        for field_name in sorted(extra_fields):
            if field_name in {"author", "editor"}:
                continue
            value = extra_fields[field_name]
            if value:
                fields[field_name] = str(value)
--- a/src/citegeist/verify.py
+++ b/src/citegeist/verify.py
@ -6,6 +6,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from .bibtex import BibEntry, parse_bibtex, render_bibtex
 from .llm_verify import VerificationLlmClient, VerificationLlmConfig
 from .resolve import MetadataResolver, Resolution
@ -75,8 +76,16 @@ class VerificationResult:
 class BibliographyVerifier:
-    def __init__(self, resolver: MetadataResolver | None = None) -> None:
+    def __init__(
        self,
        resolver: MetadataResolver | None = None,
        *,
        llm_config: VerificationLlmConfig | None = None,
        llm_client: VerificationLlmClient | None = None,
    ) -> None:
        self.resolver = resolver or MetadataResolver()
        self.llm_config = llm_config
        self.llm_client = llm_client or VerificationLlmClient()
    def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
        query_fields = _fields_from_string(value)
@ -164,10 +173,18 @@ class BibliographyVerifier:
                    input_key=input_key,
                )
        query_fields = _clone_query_fields(query_fields)
        search_query = query
        if self.llm_config is not None:
            hints = self.llm_client.analyze_query(self.llm_config, query, context)
            if hints:
                _apply_llm_hints(query_fields, hints)
                search_query = _build_search_query(search_query, hints)
        candidate_limit = max(1, limit)
        candidates = self._collect_candidates(
            title=str(query_fields.get("title", "")),
-            query=query,
+            query=search_query,
            limit=candidate_limit,
        )
        scored = [
@ -178,9 +195,21 @@ class BibliographyVerifier:
            )
            for entry, source_label in candidates
        ]
        llm_ranks = _compute_llm_ranks(
            self.llm_client.rerank_candidates(
                self.llm_config,
                query_fields,
                context,
                [match.entry for match in scored],
            )
            if self.llm_config is not None
            else None,
            scored,
        )
        scored.sort(
            key=lambda item: (
                -item.score,
                llm_ranks.get(item.entry.citation_key, len(scored)),
                item.entry.fields.get("year", ""),
                item.entry.citation_key,
            )
@ -255,6 +284,31 @@ def _fields_from_string(value: str) -> dict[str, object]:
    return {"title": title, "authors": authors, "year": year, "venue": ""}
 def _clone_query_fields(query_fields: dict[str, object]) -> dict[str, object]:
    cloned = dict(query_fields)
    authors = cloned.get("authors", [])
    cloned["authors"] = list(authors) if isinstance(authors, list) else []
    return cloned
 def _apply_llm_hints(query_fields: dict[str, object], hints: dict[str, object]) -> None:
    if not str(query_fields.get("title", "")).strip() and hints.get("title"):
        query_fields["title"] = str(hints["title"])
    if not query_fields.get("authors") and hints.get("authors"):
        query_fields["authors"] = [str(author) for author in hints["authors"] if str(author).strip()]
    if not str(query_fields.get("year", "")).strip() and hints.get("year"):
        query_fields["year"] = str(hints["year"])
    if not str(query_fields.get("venue", "")).strip() and hints.get("venue"):
        query_fields["venue"] = str(hints["venue"])
 def _build_search_query(query: str, hints: dict[str, object]) -> str:
    keywords = [str(value).strip() for value in hints.get("keywords", []) if str(value).strip()]
    if not keywords:
        return query
    return " ".join(part for part in [query, " ".join(keywords[:5])] if part).strip()
 def _score_candidate(query_fields: dict[str, object], context: str, entry: BibEntry) -> float:
    score = 0.0
    query_title = _tokenize(str(query_fields.get("title", "")))
@ -371,3 +425,13 @@ def _placeholder_entry(query_fields: dict[str, object], query: str, input_key: s
 def _slugify_key(value: str) -> str:
    slug = re.sub(r"[^a-z0-9]+", "", value.lower())
    return slug[:40] or "verification"
 def _compute_llm_ranks(order: list[int] | None, matches: list[VerificationMatch]) -> dict[str, int]:
    if not order:
        return {}
    ranks: dict[str, int] = {}
    for rank, index in enumerate(order):
        if 0 <= index < len(matches):
            ranks[matches[index].entry.citation_key] = rank
    return ranks
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -247,6 +247,71 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
    assert payload[0]["entry"]["citation_key"] == "candidate2024"
 def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
    stderr_buffer = io.StringIO()
    with redirect_stderr(stderr_buffer):
        exit_code = main(
            [
                "--db",
                str(tmp_path / "library.sqlite3"),
                "verify",
                "--string",
                "Evans 1960",
                "--llm",
            ]
        )
    assert exit_code == 1
    assert "--llm requires --llm-base-url and --llm-model" in stderr_buffer.getvalue()
 def test_cli_verify_builds_llm_config(tmp_path: Path):
    from citegeist.bibtex import BibEntry
    from citegeist.verify import VerificationResult
    database = tmp_path / "library.sqlite3"
    with patch("citegeist.cli.BibliographyVerifier") as mocked_verifier_cls:
        mocked_verifier = mocked_verifier_cls.return_value
        mocked_verifier.verify_string.return_value = VerificationResult(
            query="Evans 1960",
            context="marine mammals",
            status="ambiguous",
            confidence=0.6,
            entry=BibEntry(entry_type="misc", citation_key="evans1960", fields={"title": "Evans 1960"}),
            source_label="none",
            alternates=[],
            input_type="string",
            input_key=None,
        )
        stdout_buffer = io.StringIO()
        with redirect_stdout(stdout_buffer):
            exit_code = main(
                [
                    "--db",
                    str(database),
                    "verify",
                    "--string",
                    "Evans 1960",
                    "--llm",
                    "--llm-base-url",
                    "http://localhost:11434",
                    "--llm-model",
                    "qwen3",
                    "--llm-role",
                    "rerank",
                    "--format",
                    "json",
                ]
            )
    assert exit_code == 0
    kwargs = mocked_verifier_cls.call_args.kwargs
    assert kwargs["llm_config"].base_url == "http://localhost:11434"
    assert kwargs["llm_config"].model == "qwen3"
    assert kwargs["llm_config"].role == "rerank"
 def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
    bib_path = tmp_path / "jabref-library.bib"
    bib_path.write_text(
--- a/tests/test_live_llm_verify.py
+++ b/tests/test_live_llm_verify.py
@ -0,0 +1,78 @@
 from __future__ import annotations
 import os
 import pytest
 from citegeist.bibtex import BibEntry
 from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
 pytestmark = pytest.mark.live
 def _live_llm_config() -> VerificationLlmConfig:
    return VerificationLlmConfig(
        base_url=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
        model=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
        api_key=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
        provider=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
        role="both",
    )
 def test_live_llm_query_analysis_via_geniehive():
    client = VerificationLlmClient()
    result = client.analyze_query(
        _live_llm_config(),
        "Evans 1960",
        "marine mammals; bottlenose dolphin echolocation",
    )
    if result is None:
        pytest.skip("local GenieHive route did not return parseable JSON for query analysis")
    assert isinstance(result["authors"], list)
    assert isinstance(result["keywords"], list)
 def test_live_llm_candidate_rerank_via_geniehive():
    client = VerificationLlmClient()
    candidates = [
        BibEntry(
            entry_type="article",
            citation_key="candidate_a",
            fields={
                "author": "Doe, Jane",
                "title": "General Marine Biology Survey",
                "year": "1960",
                "journal": "Marine Science",
            },
        ),
        BibEntry(
            entry_type="article",
            citation_key="candidate_b",
            fields={
                "author": "Evans, William",
                "title": "Echolocation by marine dolphins",
                "year": "1960",
                "journal": "Journal of the Acoustical Society",
            },
        ),
    ]
    result = client.rerank_candidates(
        _live_llm_config(),
        {
            "title": "",
            "authors": ["Evans"],
            "year": "1960",
            "venue": "",
        },
        "bottlenose dolphin echolocation",
        candidates,
    )
    if result is None:
        pytest.skip("local GenieHive route did not return parseable JSON for candidate reranking")
    assert result
    assert all(isinstance(index, int) for index in result)
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@ -1,6 +1,7 @@
 from __future__ import annotations
 from citegeist.bibtex import BibEntry
 from citegeist.llm_verify import VerificationLlmConfig, _loads_lenient_json
 from citegeist.resolve import Resolution
 from citegeist.verify import BibliographyVerifier
@ -120,3 +121,102 @@ def test_verification_result_to_bib_entry_contains_audit_fields():
    assert bib_entry.fields["x_status"] == "not_found"
    assert bib_entry.fields["x_query"] == "Missing Work"
 def test_verifier_llm_expand_only_fills_missing_fields():
    class _FakeLlmClient:
        def analyze_query(self, config, query, context):
            return {
                "title": "Expanded Title",
                "authors": ["Smith"],
                "year": "2024",
                "venue": "Journal of Tests",
                "keywords": ["echolocation", "marine"],
            }
        def rerank_candidates(self, config, query_fields, context, candidates):
            return None
    verifier = BibliographyVerifier(
        llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
        llm_client=_FakeLlmClient(),
    )
    seen_titles: list[str] = []
    verifier.resolver.search_crossref = lambda title, limit=5: (seen_titles.append(title) or [])  # type: ignore[method-assign]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.verify_string("Evans 1960", context="bottlenose dolphin echolocation")
    assert seen_titles == ["Expanded Title"]
 def test_verifier_llm_rerank_only_breaks_score_ties():
    class _FakeLlmClient:
        def analyze_query(self, config, query, context):
            return None
        def rerank_candidates(self, config, query_fields, context, candidates):
            return [1, 0]
    verifier = BibliographyVerifier(
        llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="rerank"),
        llm_client=_FakeLlmClient(),
    )
    verifier.resolver.search_crossref = lambda title, limit=5: [  # type: ignore[method-assign]
        BibEntry(
            entry_type="article",
            citation_key="alpha",
            fields={"author": "Smith, Jane", "title": "Shared Match Primary", "year": "2024"},
        ),
        BibEntry(
            entry_type="article",
            citation_key="beta",
            fields={"author": "Smith, Jane", "title": "Shared Match Secondary", "year": "2024"},
        ),
    ]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]
    result = verifier.verify_string('"Shared Match" Smith 2024')
    assert result.entry.citation_key == "beta"
    assert result.alternates[0].entry.citation_key == "alpha"
 def test_verifier_llm_cannot_create_exact_without_verified_doi():
    class _FakeLlmClient:
        def analyze_query(self, config, query, context):
            return {"title": "Resolved Work", "authors": ["Smith"], "year": "2024", "venue": None, "keywords": []}
        def rerank_candidates(self, config, query_fields, context, candidates):
            return None
    verifier = BibliographyVerifier(
        llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
        llm_client=_FakeLlmClient(),
    )
    verifier.resolver.search_crossref = lambda title, limit=5: [  # type: ignore[method-assign]
        BibEntry(
            entry_type="article",
            citation_key="candidate",
            fields={"author": "Smith, Jane", "title": "Resolved Work", "year": "2024"},
        )
    ]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]
    result = verifier.verify_string("Smith 2024", context="citation graphs")
    assert result.status != "exact"
 def test_llm_json_loader_accepts_fenced_payload():
    payload = '```json\n{"title":"Resolved Work","authors":["Smith"],"keywords":["graphs"]}\n```'
    result = _loads_lenient_json(payload)
    assert result["title"] == "Resolved Work"