LLM verify + fixes + tests

This commit is contained in:
welsberr 2026-04-21 03:15:33 -04:00
parent 65fde034e1
commit 4894341ba8
13 changed files with 751 additions and 10 deletions

2
.gitignore vendored
View File

@ -6,3 +6,5 @@ __pycache__/
*.egg-info/ *.egg-info/
library.sqlite3 library.sqlite3
ops/ ops/
.codex
SESSION_*

View File

@ -1,7 +1,7 @@
PYTHONPATH_SRC=PYTHONPATH=src PYTHONPATH_SRC=PYTHONPATH=src
VENV_PYTHON=.venv/bin/python VENV_PYTHON=.venv/bin/python
.PHONY: test test-live live-smoke validate-talkorigins .PHONY: test test-live live-smoke live-verify-llm-smoke validate-talkorigins
test: test:
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m pytest -q $(PYTHONPATH_SRC) $(VENV_PYTHON) -m pytest -q
@ -12,5 +12,8 @@ test-live:
live-smoke: live-smoke:
CITEGEIST_SOURCE_CACHE=.cache/citegeist $(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_smoke.py CITEGEIST_SOURCE_CACHE=.cache/citegeist $(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_smoke.py
live-verify-llm-smoke:
$(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_verify_llm_smoke.py
validate-talkorigins: validate-talkorigins:
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m citegeist validate-talkorigins talkorigins-out/talkorigins_manifest.json $(PYTHONPATH_SRC) $(VENV_PYTHON) -m citegeist validate-talkorigins talkorigins-out/talkorigins_manifest.json

View File

@ -172,6 +172,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --ba
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --output compare-summary.json PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --output compare-summary.json
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --max-rows-with-differences 0 --output compare-check.json PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --max-rows-with-differences 0 --output compare-check.json
PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
@ -257,6 +258,58 @@ The built-in extraction backends are:
The backend interface exists so future GROBID- or other parser adapters can be registered without replacing the local parser or changing the CLI contract. The backend interface exists so future GROBID- or other parser adapters can be registered without replacing the local parser or changing the CLI contract.
## LLM-Assisted Verify
`citegeist verify` can optionally use a local LLM for two bounded tasks:
- `expand`: infer missing bibliographic clues from free text and context
- `rerank`: advisory reranking of already fetched resolver candidates
Example:
```bash
PYTHONPATH=src .venv/bin/python -m citegeist verify \
--string 'Evans 1960' \
--context "bottlenose dolphin echolocation" \
--llm \
--llm-base-url http://localhost:11434 \
--llm-model qwen3 \
--llm-role both \
--format json
```
Supported local endpoint styles:
- OpenAI-compatible APIs such as `http://localhost:11434/v1`
- Ollama native chat APIs such as `http://localhost:11434`
For the current local GenieHive setup, this also works directly:
```bash
PYTHONPATH=src .venv/bin/python -m citegeist verify \
--string 'Evans 1960' \
--context "bottlenose dolphin echolocation" \
--llm \
--llm-base-url http://127.0.0.1:8800/v1 \
--llm-api-key change-me-client-key \
--llm-model general_assistant \
--llm-role both \
--format json
```
There is also a local smoke script for the LLM helper path alone:
```bash
make live-verify-llm-smoke
```
Safety constraints:
- the LLM is never trusted for DOI or identifier invention
- the LLM only fills missing query clues or suggests candidate order
- `exact` status still requires verified resolver evidence, not LLM output
- if the LLM fails or returns unusable JSON, `verify` falls back to the normal resolver-only path
To compare backend output on the same plaintext references, use `compare-extract`. It aligns entries by ordinal/reference block and emits JSON with per-backend payloads plus a `differing_fields` summary for each row. Add `--summary` when you want a compact evaluation artifact with disagreement counts by field and backend presence counts instead of the full row-by-row payload. Add `--max-rows-with-differences` and/or `--max-field-difference-count` when you want CI-style failure thresholds; the command will emit the summary JSON and return a nonzero exit code if the limits are exceeded. To compare backend output on the same plaintext references, use `compare-extract`. It aligns entries by ordinal/reference block and emits JSON with per-backend payloads plus a `differing_fields` summary for each row. Add `--summary` when you want a compact evaluation artifact with disagreement counts by field and backend presence counts instead of the full row-by-row payload. Add `--max-rows-with-differences` and/or `--max-field-difference-count` when you want CI-style failure thresholds; the command will emit the summary JSON and return a nonzero exit code if the limits are exceeded.
For regression-oriented parser work, keep a small curated plaintext fixture set and run `compare-extract` against multiple backends before changing heuristics. That makes backend disagreement explicit and gives you a stable review artifact for parser changes. For regression-oriented parser work, keep a small curated plaintext fixture set and run `compare-extract` against multiple backends before changing heuristics. That makes backend disagreement explicit and gives you a stable review artifact for parser changes.

View File

@ -0,0 +1,97 @@
from __future__ import annotations
import argparse
import json
import os
from citegeist.bibtex import BibEntry
from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Run live LLM verify smoke checks against a local OpenAI-compatible endpoint")
parser.add_argument(
"--base-url",
default=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
help="OpenAI-compatible or Ollama base URL",
)
parser.add_argument(
"--model",
default=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
help="Model or route ID exposed by the local endpoint",
)
parser.add_argument(
"--api-key",
default=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
help="Optional API key for the local endpoint",
)
parser.add_argument(
"--provider",
default=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
choices=["auto", "openai", "ollama-native"],
help="Endpoint protocol style",
)
return parser
def main() -> int:
args = build_parser().parse_args()
client = VerificationLlmClient()
config = VerificationLlmConfig(
base_url=args.base_url,
model=args.model,
api_key=args.api_key,
provider=args.provider,
role="both",
)
analysis = client.analyze_query(
config,
"Evans 1960",
"marine mammals; bottlenose dolphin echolocation",
)
rerank = client.rerank_candidates(
config,
{"title": "", "authors": ["Evans"], "year": "1960", "venue": ""},
"bottlenose dolphin echolocation",
[
BibEntry(
entry_type="article",
citation_key="candidate_a",
fields={
"author": "Doe, Jane",
"title": "General Marine Biology Survey",
"year": "1960",
"journal": "Marine Science",
},
),
BibEntry(
entry_type="article",
citation_key="candidate_b",
fields={
"author": "Evans, William",
"title": "Echolocation by marine dolphins",
"year": "1960",
"journal": "Journal of the Acoustical Society",
},
),
],
)
print(
json.dumps(
{
"base_url": args.base_url,
"model": args.model,
"analysis": analysis,
"rerank": rerank,
},
indent=2,
sort_keys=True,
)
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -13,6 +13,7 @@ from .extract import (
summarize_extraction_comparison, summarize_extraction_comparison,
) )
from .harvest import OaiMetadataFormat, OaiPmhHarvester, OaiSet from .harvest import OaiMetadataFormat, OaiPmhHarvester, OaiSet
from .llm_verify import VerificationLlmClient, VerificationLlmConfig
from .resolve import MetadataResolver, merge_entries, merge_entries_with_conflicts from .resolve import MetadataResolver, merge_entries, merge_entries_with_conflicts
from .sources import SourceClient from .sources import SourceClient
from .storage import BibliographyStore from .storage import BibliographyStore
@ -34,6 +35,8 @@ __all__ = [
"OaiMetadataFormat", "OaiMetadataFormat",
"OaiSet", "OaiSet",
"SourceClient", "SourceClient",
"VerificationLlmClient",
"VerificationLlmConfig",
"VerificationMatch", "VerificationMatch",
"VerificationResult", "VerificationResult",
"available_extraction_backends", "available_extraction_backends",

View File

@ -20,6 +20,7 @@ from .extract import (
summarize_extraction_comparison, summarize_extraction_comparison,
) )
from .harvest import OaiPmhHarvester from .harvest import OaiPmhHarvester
from .llm_verify import VerificationLlmConfig
from .resolve import MetadataResolver, merge_entries_with_conflicts from .resolve import MetadataResolver, merge_entries_with_conflicts
from .storage import BibliographyStore from .storage import BibliographyStore
from .verify import BibliographyVerifier, render_verification_results from .verify import BibliographyVerifier, render_verification_results
@ -145,6 +146,22 @@ def build_parser() -> argparse.ArgumentParser:
verify_group.add_argument("--bib", help="Path to a BibTeX file whose entries should be verified") verify_group.add_argument("--bib", help="Path to a BibTeX file whose entries should be verified")
verify_parser.add_argument("--context", default="", help="Optional topic context used for scoring") verify_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
verify_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per input") verify_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per input")
verify_parser.add_argument("--llm", action="store_true", help="Enable optional local LLM assistance for verify")
verify_parser.add_argument("--llm-base-url", help="OpenAI-compatible or Ollama base URL for local LLM assistance")
verify_parser.add_argument("--llm-model", help="Model ID for local LLM assistance")
verify_parser.add_argument("--llm-api-key", default="", help="Optional API key for the LLM endpoint")
verify_parser.add_argument(
"--llm-provider",
choices=["auto", "openai", "ollama-native"],
default="auto",
help="LLM API style; auto treats `/v1` endpoints as OpenAI-compatible",
)
verify_parser.add_argument(
"--llm-role",
choices=["expand", "rerank", "both"],
default="both",
help="Use the local LLM for query-clue extraction, candidate reranking, or both",
)
verify_parser.add_argument( verify_parser.add_argument(
"--format", "--format",
choices=["bibtex", "json"], choices=["bibtex", "json"],
@ -715,7 +732,21 @@ def main(argv: list[str] | None = None) -> int:
args.output, args.output,
) )
if args.command == "verify": if args.command == "verify":
return _run_verify(args.string, args.list_input, args.bib, args.context, args.limit, args.format, args.output) return _run_verify(
args.string,
args.list_input,
args.bib,
args.context,
args.limit,
args.format,
args.output,
llm_enabled=args.llm,
llm_base_url=args.llm_base_url,
llm_model=args.llm_model,
llm_api_key=args.llm_api_key,
llm_provider=args.llm_provider,
llm_role=args.llm_role,
)
if args.command == "resolve": if args.command == "resolve":
return _run_resolve(store, args.citation_keys) return _run_resolve(store, args.citation_keys)
if args.command == "resolve-stubs": if args.command == "resolve-stubs":
@ -750,8 +781,6 @@ def main(argv: list[str] | None = None) -> int:
args.rounds, args.rounds,
args.recent_years, args.recent_years,
args.target_recent_entries, args.target_recent_entries,
args.max_expanded_entries,
args.max_expand_seconds,
) )
if args.command == "set-topic-phrase": if args.command == "set-topic-phrase":
return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear) return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
@ -785,6 +814,8 @@ def main(argv: list[str] | None = None) -> int:
args.expansion_rounds, args.expansion_rounds,
args.recent_years, args.recent_years,
args.target_recent_entries, args.target_recent_entries,
args.max_expanded_entries,
args.max_expand_seconds,
) )
if args.command == "bootstrap-batch": if args.command == "bootstrap-batch":
return _run_bootstrap_batch(store, Path(args.input)) return _run_bootstrap_batch(store, Path(args.input))
@ -1121,8 +1152,27 @@ def _run_verify(
limit: int, limit: int,
output_format: str, output_format: str,
output: str | None, output: str | None,
*,
llm_enabled: bool = False,
llm_base_url: str | None = None,
llm_model: str | None = None,
llm_api_key: str = "",
llm_provider: str = "auto",
llm_role: str = "both",
) -> int: ) -> int:
verifier = BibliographyVerifier() llm_config = None
if llm_enabled:
if not llm_base_url or not llm_model:
print("--llm requires --llm-base-url and --llm-model", file=sys.stderr)
return 1
llm_config = VerificationLlmConfig(
base_url=llm_base_url,
model=llm_model,
api_key=llm_api_key,
provider=llm_provider,
role=llm_role,
)
verifier = BibliographyVerifier(llm_config=llm_config)
if string_input is not None: if string_input is not None:
results = [verifier.verify_string(string_input, context=context, limit=limit)] results = [verifier.verify_string(string_input, context=context, limit=limit)]
elif list_input is not None: elif list_input is not None:

View File

@ -196,7 +196,7 @@ class OpenAlexExpander:
) )
results.append( results.append(
ExpansionResult( ExpansionResult(
source_citation_key=citation_key, source_citation_key=source_key,
discovered_citation_key=existing_key or discovered.citation_key, discovered_citation_key=existing_key or discovered.citation_key,
created_entry=created, created_entry=created,
relation_type=relation_type, relation_type=relation_type,
@ -335,7 +335,7 @@ class TopicExpander:
assigned_to_topic=assigned, assigned_to_topic=assigned,
) )
) )
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries: if target_recent_entries is not None and len(recent_hits) >= target_recent_entries:
self.last_run_meta.update({ self.last_run_meta.update({
"stop_reason": "target_recent_entries", "stop_reason": "target_recent_entries",
"recent_hits": len(recent_hits), "recent_hits": len(recent_hits),

218
src/citegeist/llm_verify.py Normal file
View File

@ -0,0 +1,218 @@
from __future__ import annotations
import json
import re
import urllib.request
from dataclasses import dataclass
from typing import Any, Callable
from .bibtex import BibEntry
DEFAULT_SYSTEM_PROMPT = (
"You are a meticulous bibliography verification assistant. "
"You never invent DOIs, page ranges, venues, or identifiers. "
"You may only suggest missing clues from the provided input and context. "
"When uncertain, return null or an empty list. "
"Always respond with strict JSON matching the requested shape."
)
@dataclass(slots=True)
class VerificationLlmConfig:
base_url: str
model: str
api_key: str = ""
provider: str = "auto"
role: str = "both"
def enabled_for(self, capability: str) -> bool:
return bool(self.base_url and self.model) and self.role in {capability, "both"}
class VerificationLlmClient:
def __init__(
self,
*,
timeout_s: int = 60,
post_json: Callable[[str, dict[str, Any], dict[str, str], int], dict[str, Any]] | None = None,
) -> None:
self.timeout_s = timeout_s
self._post_json = post_json or _default_post_json
def analyze_query(
self,
config: VerificationLlmConfig,
free_text: str,
context: str,
) -> dict[str, Any] | None:
if not config.enabled_for("expand"):
return None
payload = {
"task": "extract_bibliographic_clues",
"input": {"free_text": free_text, "context": context},
"rules": [
"Never invent a DOI or identifier.",
"Only fill clues that plausibly follow from the input and context.",
"Return null for unknown scalar fields.",
],
"schema": {
"type": "object",
"properties": {
"title": {"type": ["string", "null"]},
"authors": {"type": "array", "items": {"type": "string"}},
"year": {"type": ["string", "null"]},
"venue": {"type": ["string", "null"]},
"keywords": {"type": "array", "items": {"type": "string"}},
},
"required": ["authors", "keywords"],
},
}
result = self._chat_json(config, payload)
if not isinstance(result, dict):
return None
authors = [str(value).strip() for value in result.get("authors", []) if str(value).strip()]
keywords = [str(value).strip() for value in result.get("keywords", []) if str(value).strip()]
return {
"title": _optional_string(result.get("title")),
"authors": authors,
"year": _optional_string(result.get("year")),
"venue": _optional_string(result.get("venue")),
"keywords": keywords,
}
def rerank_candidates(
self,
config: VerificationLlmConfig,
query_fields: dict[str, object],
context: str,
candidates: list[BibEntry],
) -> list[int] | None:
if not config.enabled_for("rerank") or not candidates:
return None
payload = {
"task": "rerank_candidates",
"instruction": (
"Return a JSON array of candidate indices sorted best to worst. "
"Do not invent metadata. Prefer candidates that better match the given clues."
),
"input": {
"query_fields": query_fields,
"context": context,
"candidates": [
{
"title": entry.fields.get("title", ""),
"authors": entry.fields.get("author", "").split(" and ") if entry.fields.get("author") else [],
"year": entry.fields.get("year", ""),
"venue": entry.fields.get("journal", "") or entry.fields.get("booktitle", ""),
"doi": entry.fields.get("doi", ""),
}
for entry in candidates[:8]
],
},
}
result = self._chat_json(config, payload)
if not isinstance(result, list):
return None
indices = [value for value in result if isinstance(value, int) and 0 <= value < len(candidates)]
return indices or None
def _chat_json(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
try:
if _llm_mode(config.base_url, config.provider) == "openai":
return self._chat_openai(config, payload)
return self._chat_ollama_native(config, payload)
except Exception:
return None
def _chat_openai(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
headers = {"Content-Type": "application/json"}
if config.api_key:
headers["Authorization"] = f"Bearer {config.api_key}"
body = {
"model": config.model,
"temperature": 0,
"messages": [
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
{"role": "user", "content": json.dumps(payload)},
],
}
data = self._post_json(
config.base_url.rstrip("/") + "/chat/completions",
body,
headers,
self.timeout_s,
)
content = data["choices"][0]["message"]["content"]
return _loads_lenient_json(content)
def _chat_ollama_native(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
base_url = config.base_url.rstrip("/")
if base_url.endswith("/v1"):
base_url = base_url[:-3]
body = {
"model": config.model,
"messages": [
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
{"role": "user", "content": json.dumps(payload)},
],
"options": {"temperature": 0},
"stream": False,
}
headers = {"Content-Type": "application/json"}
if config.api_key:
headers["Authorization"] = f"Bearer {config.api_key}"
data = self._post_json(
base_url + "/api/chat",
body,
headers,
self.timeout_s,
)
content = data["message"]["content"]
return _loads_lenient_json(content)
def _default_post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout_s: int) -> dict[str, Any]:
request = urllib.request.Request(
url,
data=json.dumps(payload).encode("utf-8"),
headers=headers,
method="POST",
)
with urllib.request.urlopen(request, timeout=timeout_s) as response:
return json.loads(response.read().decode("utf-8"))
def _llm_mode(base_url: str, provider: str) -> str:
if provider == "openai":
return "openai"
if provider == "ollama-native":
return "ollama-native"
return "openai" if base_url.rstrip("/").endswith("/v1") else "ollama-native"
def _optional_string(value: object) -> str | None:
text = str(value or "").strip()
return text or None
def _loads_lenient_json(content: str) -> Any:
try:
return json.loads(content)
except Exception:
pass
fenced = re.search(r"```(?:json)?\s*(\{.*\}|\[.*\])\s*```", content, flags=re.DOTALL)
if fenced:
return json.loads(fenced.group(1))
for opener, closer in (("{", "}"), ("[", "]")):
start = content.find(opener)
end = content.rfind(closer)
if start != -1 and end != -1 and end > start:
snippet = content[start : end + 1]
try:
return json.loads(snippet)
except Exception:
continue
raise ValueError("Model response did not contain parseable JSON")

View File

@ -246,7 +246,13 @@ class BibliographyStore:
entry.fields.get("isbn"), entry.fields.get("isbn"),
fulltext, fulltext,
raw_bibtex, raw_bibtex,
json.dumps({k: v for k, v in entry.fields.items() if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS}), json.dumps(
{
k: v
for k, v in entry.fields.items()
if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS and k not in {"author", "editor"}
}
),
), ),
).fetchone() ).fetchone()
entry_id = int(row["id"]) entry_id = int(row["id"])
@ -1142,6 +1148,8 @@ class BibliographyStore:
extra_fields = json.loads(row["extra_fields_json"]) extra_fields = json.loads(row["extra_fields_json"])
for field_name in sorted(extra_fields): for field_name in sorted(extra_fields):
if field_name in {"author", "editor"}:
continue
value = extra_fields[field_name] value = extra_fields[field_name]
if value: if value:
fields[field_name] = str(value) fields[field_name] = str(value)

View File

@ -6,6 +6,7 @@ from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from .bibtex import BibEntry, parse_bibtex, render_bibtex from .bibtex import BibEntry, parse_bibtex, render_bibtex
from .llm_verify import VerificationLlmClient, VerificationLlmConfig
from .resolve import MetadataResolver, Resolution from .resolve import MetadataResolver, Resolution
@ -75,8 +76,16 @@ class VerificationResult:
class BibliographyVerifier: class BibliographyVerifier:
def __init__(self, resolver: MetadataResolver | None = None) -> None: def __init__(
self,
resolver: MetadataResolver | None = None,
*,
llm_config: VerificationLlmConfig | None = None,
llm_client: VerificationLlmClient | None = None,
) -> None:
self.resolver = resolver or MetadataResolver() self.resolver = resolver or MetadataResolver()
self.llm_config = llm_config
self.llm_client = llm_client or VerificationLlmClient()
def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult: def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
query_fields = _fields_from_string(value) query_fields = _fields_from_string(value)
@ -164,10 +173,18 @@ class BibliographyVerifier:
input_key=input_key, input_key=input_key,
) )
query_fields = _clone_query_fields(query_fields)
search_query = query
if self.llm_config is not None:
hints = self.llm_client.analyze_query(self.llm_config, query, context)
if hints:
_apply_llm_hints(query_fields, hints)
search_query = _build_search_query(search_query, hints)
candidate_limit = max(1, limit) candidate_limit = max(1, limit)
candidates = self._collect_candidates( candidates = self._collect_candidates(
title=str(query_fields.get("title", "")), title=str(query_fields.get("title", "")),
query=query, query=search_query,
limit=candidate_limit, limit=candidate_limit,
) )
scored = [ scored = [
@ -178,9 +195,21 @@ class BibliographyVerifier:
) )
for entry, source_label in candidates for entry, source_label in candidates
] ]
llm_ranks = _compute_llm_ranks(
self.llm_client.rerank_candidates(
self.llm_config,
query_fields,
context,
[match.entry for match in scored],
)
if self.llm_config is not None
else None,
scored,
)
scored.sort( scored.sort(
key=lambda item: ( key=lambda item: (
-item.score, -item.score,
llm_ranks.get(item.entry.citation_key, len(scored)),
item.entry.fields.get("year", ""), item.entry.fields.get("year", ""),
item.entry.citation_key, item.entry.citation_key,
) )
@ -255,6 +284,31 @@ def _fields_from_string(value: str) -> dict[str, object]:
return {"title": title, "authors": authors, "year": year, "venue": ""} return {"title": title, "authors": authors, "year": year, "venue": ""}
def _clone_query_fields(query_fields: dict[str, object]) -> dict[str, object]:
cloned = dict(query_fields)
authors = cloned.get("authors", [])
cloned["authors"] = list(authors) if isinstance(authors, list) else []
return cloned
def _apply_llm_hints(query_fields: dict[str, object], hints: dict[str, object]) -> None:
if not str(query_fields.get("title", "")).strip() and hints.get("title"):
query_fields["title"] = str(hints["title"])
if not query_fields.get("authors") and hints.get("authors"):
query_fields["authors"] = [str(author) for author in hints["authors"] if str(author).strip()]
if not str(query_fields.get("year", "")).strip() and hints.get("year"):
query_fields["year"] = str(hints["year"])
if not str(query_fields.get("venue", "")).strip() and hints.get("venue"):
query_fields["venue"] = str(hints["venue"])
def _build_search_query(query: str, hints: dict[str, object]) -> str:
keywords = [str(value).strip() for value in hints.get("keywords", []) if str(value).strip()]
if not keywords:
return query
return " ".join(part for part in [query, " ".join(keywords[:5])] if part).strip()
def _score_candidate(query_fields: dict[str, object], context: str, entry: BibEntry) -> float: def _score_candidate(query_fields: dict[str, object], context: str, entry: BibEntry) -> float:
score = 0.0 score = 0.0
query_title = _tokenize(str(query_fields.get("title", ""))) query_title = _tokenize(str(query_fields.get("title", "")))
@ -371,3 +425,13 @@ def _placeholder_entry(query_fields: dict[str, object], query: str, input_key: s
def _slugify_key(value: str) -> str: def _slugify_key(value: str) -> str:
slug = re.sub(r"[^a-z0-9]+", "", value.lower()) slug = re.sub(r"[^a-z0-9]+", "", value.lower())
return slug[:40] or "verification" return slug[:40] or "verification"
def _compute_llm_ranks(order: list[int] | None, matches: list[VerificationMatch]) -> dict[str, int]:
if not order:
return {}
ranks: dict[str, int] = {}
for rank, index in enumerate(order):
if 0 <= index < len(matches):
ranks[matches[index].entry.citation_key] = rank
return ranks

View File

@ -247,6 +247,71 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
assert payload[0]["entry"]["citation_key"] == "candidate2024" assert payload[0]["entry"]["citation_key"] == "candidate2024"
def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
stderr_buffer = io.StringIO()
with redirect_stderr(stderr_buffer):
exit_code = main(
[
"--db",
str(tmp_path / "library.sqlite3"),
"verify",
"--string",
"Evans 1960",
"--llm",
]
)
assert exit_code == 1
assert "--llm requires --llm-base-url and --llm-model" in stderr_buffer.getvalue()
def test_cli_verify_builds_llm_config(tmp_path: Path):
from citegeist.bibtex import BibEntry
from citegeist.verify import VerificationResult
database = tmp_path / "library.sqlite3"
with patch("citegeist.cli.BibliographyVerifier") as mocked_verifier_cls:
mocked_verifier = mocked_verifier_cls.return_value
mocked_verifier.verify_string.return_value = VerificationResult(
query="Evans 1960",
context="marine mammals",
status="ambiguous",
confidence=0.6,
entry=BibEntry(entry_type="misc", citation_key="evans1960", fields={"title": "Evans 1960"}),
source_label="none",
alternates=[],
input_type="string",
input_key=None,
)
stdout_buffer = io.StringIO()
with redirect_stdout(stdout_buffer):
exit_code = main(
[
"--db",
str(database),
"verify",
"--string",
"Evans 1960",
"--llm",
"--llm-base-url",
"http://localhost:11434",
"--llm-model",
"qwen3",
"--llm-role",
"rerank",
"--format",
"json",
]
)
assert exit_code == 0
kwargs = mocked_verifier_cls.call_args.kwargs
assert kwargs["llm_config"].base_url == "http://localhost:11434"
assert kwargs["llm_config"].model == "qwen3"
assert kwargs["llm_config"].role == "rerank"
def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path): def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
bib_path = tmp_path / "jabref-library.bib" bib_path = tmp_path / "jabref-library.bib"
bib_path.write_text( bib_path.write_text(

View File

@ -0,0 +1,78 @@
from __future__ import annotations
import os
import pytest
from citegeist.bibtex import BibEntry
from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
pytestmark = pytest.mark.live
def _live_llm_config() -> VerificationLlmConfig:
return VerificationLlmConfig(
base_url=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
model=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
api_key=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
provider=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
role="both",
)
def test_live_llm_query_analysis_via_geniehive():
client = VerificationLlmClient()
result = client.analyze_query(
_live_llm_config(),
"Evans 1960",
"marine mammals; bottlenose dolphin echolocation",
)
if result is None:
pytest.skip("local GenieHive route did not return parseable JSON for query analysis")
assert isinstance(result["authors"], list)
assert isinstance(result["keywords"], list)
def test_live_llm_candidate_rerank_via_geniehive():
client = VerificationLlmClient()
candidates = [
BibEntry(
entry_type="article",
citation_key="candidate_a",
fields={
"author": "Doe, Jane",
"title": "General Marine Biology Survey",
"year": "1960",
"journal": "Marine Science",
},
),
BibEntry(
entry_type="article",
citation_key="candidate_b",
fields={
"author": "Evans, William",
"title": "Echolocation by marine dolphins",
"year": "1960",
"journal": "Journal of the Acoustical Society",
},
),
]
result = client.rerank_candidates(
_live_llm_config(),
{
"title": "",
"authors": ["Evans"],
"year": "1960",
"venue": "",
},
"bottlenose dolphin echolocation",
candidates,
)
if result is None:
pytest.skip("local GenieHive route did not return parseable JSON for candidate reranking")
assert result
assert all(isinstance(index, int) for index in result)

View File

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
from citegeist.bibtex import BibEntry from citegeist.bibtex import BibEntry
from citegeist.llm_verify import VerificationLlmConfig, _loads_lenient_json
from citegeist.resolve import Resolution from citegeist.resolve import Resolution
from citegeist.verify import BibliographyVerifier from citegeist.verify import BibliographyVerifier
@ -120,3 +121,102 @@ def test_verification_result_to_bib_entry_contains_audit_fields():
assert bib_entry.fields["x_status"] == "not_found" assert bib_entry.fields["x_status"] == "not_found"
assert bib_entry.fields["x_query"] == "Missing Work" assert bib_entry.fields["x_query"] == "Missing Work"
def test_verifier_llm_expand_only_fills_missing_fields():
class _FakeLlmClient:
def analyze_query(self, config, query, context):
return {
"title": "Expanded Title",
"authors": ["Smith"],
"year": "2024",
"venue": "Journal of Tests",
"keywords": ["echolocation", "marine"],
}
def rerank_candidates(self, config, query_fields, context, candidates):
return None
verifier = BibliographyVerifier(
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
llm_client=_FakeLlmClient(),
)
seen_titles: list[str] = []
verifier.resolver.search_crossref = lambda title, limit=5: (seen_titles.append(title) or []) # type: ignore[method-assign]
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.verify_string("Evans 1960", context="bottlenose dolphin echolocation")
assert seen_titles == ["Expanded Title"]
def test_verifier_llm_rerank_only_breaks_score_ties():
class _FakeLlmClient:
def analyze_query(self, config, query, context):
return None
def rerank_candidates(self, config, query_fields, context, candidates):
return [1, 0]
verifier = BibliographyVerifier(
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="rerank"),
llm_client=_FakeLlmClient(),
)
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
BibEntry(
entry_type="article",
citation_key="alpha",
fields={"author": "Smith, Jane", "title": "Shared Match Primary", "year": "2024"},
),
BibEntry(
entry_type="article",
citation_key="beta",
fields={"author": "Smith, Jane", "title": "Shared Match Secondary", "year": "2024"},
),
]
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
result = verifier.verify_string('"Shared Match" Smith 2024')
assert result.entry.citation_key == "beta"
assert result.alternates[0].entry.citation_key == "alpha"
def test_verifier_llm_cannot_create_exact_without_verified_doi():
class _FakeLlmClient:
def analyze_query(self, config, query, context):
return {"title": "Resolved Work", "authors": ["Smith"], "year": "2024", "venue": None, "keywords": []}
def rerank_candidates(self, config, query_fields, context, candidates):
return None
verifier = BibliographyVerifier(
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
llm_client=_FakeLlmClient(),
)
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
BibEntry(
entry_type="article",
citation_key="candidate",
fields={"author": "Smith, Jane", "title": "Resolved Work", "year": "2024"},
)
]
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
result = verifier.verify_string("Smith 2024", context="citation graphs")
assert result.status != "exact"
def test_llm_json_loader_accepts_fenced_payload():
payload = '```json\n{"title":"Resolved Work","authors":["Smith"],"keywords":["graphs"]}\n```'
result = _loads_lenient_json(payload)
assert result["title"] == "Resolved Work"