LLM verify + fixes + tests
This commit is contained in:
parent
65fde034e1
commit
4894341ba8
|
|
@ -6,3 +6,5 @@ __pycache__/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
library.sqlite3
|
library.sqlite3
|
||||||
ops/
|
ops/
|
||||||
|
.codex
|
||||||
|
SESSION_*
|
||||||
|
|
|
||||||
5
Makefile
5
Makefile
|
|
@ -1,7 +1,7 @@
|
||||||
PYTHONPATH_SRC=PYTHONPATH=src
|
PYTHONPATH_SRC=PYTHONPATH=src
|
||||||
VENV_PYTHON=.venv/bin/python
|
VENV_PYTHON=.venv/bin/python
|
||||||
|
|
||||||
.PHONY: test test-live live-smoke validate-talkorigins
|
.PHONY: test test-live live-smoke live-verify-llm-smoke validate-talkorigins
|
||||||
|
|
||||||
test:
|
test:
|
||||||
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m pytest -q
|
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m pytest -q
|
||||||
|
|
@ -12,5 +12,8 @@ test-live:
|
||||||
live-smoke:
|
live-smoke:
|
||||||
CITEGEIST_SOURCE_CACHE=.cache/citegeist $(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_smoke.py
|
CITEGEIST_SOURCE_CACHE=.cache/citegeist $(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_smoke.py
|
||||||
|
|
||||||
|
live-verify-llm-smoke:
|
||||||
|
$(PYTHONPATH_SRC) $(VENV_PYTHON) scripts/live_verify_llm_smoke.py
|
||||||
|
|
||||||
validate-talkorigins:
|
validate-talkorigins:
|
||||||
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m citegeist validate-talkorigins talkorigins-out/talkorigins_manifest.json
|
$(PYTHONPATH_SRC) $(VENV_PYTHON) -m citegeist validate-talkorigins talkorigins-out/talkorigins_manifest.json
|
||||||
|
|
|
||||||
53
README.md
53
README.md
|
|
@ -172,6 +172,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --ba
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --output compare-summary.json
|
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --output compare-summary.json
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --max-rows-with-differences 0 --output compare-check.json
|
PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --backend heuristic --backend grobid --summary --max-rows-with-differences 0 --output compare-check.json
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
|
||||||
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
||||||
|
|
@ -257,6 +258,58 @@ The built-in extraction backends are:
|
||||||
|
|
||||||
The backend interface exists so future GROBID- or other parser adapters can be registered without replacing the local parser or changing the CLI contract.
|
The backend interface exists so future GROBID- or other parser adapters can be registered without replacing the local parser or changing the CLI contract.
|
||||||
|
|
||||||
|
## LLM-Assisted Verify
|
||||||
|
|
||||||
|
`citegeist verify` can optionally use a local LLM for two bounded tasks:
|
||||||
|
|
||||||
|
- `expand`: infer missing bibliographic clues from free text and context
|
||||||
|
- `rerank`: advisory reranking of already fetched resolver candidates
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
PYTHONPATH=src .venv/bin/python -m citegeist verify \
|
||||||
|
--string 'Evans 1960' \
|
||||||
|
--context "bottlenose dolphin echolocation" \
|
||||||
|
--llm \
|
||||||
|
--llm-base-url http://localhost:11434 \
|
||||||
|
--llm-model qwen3 \
|
||||||
|
--llm-role both \
|
||||||
|
--format json
|
||||||
|
```
|
||||||
|
|
||||||
|
Supported local endpoint styles:
|
||||||
|
|
||||||
|
- OpenAI-compatible APIs such as `http://localhost:11434/v1`
|
||||||
|
- Ollama native chat APIs such as `http://localhost:11434`
|
||||||
|
|
||||||
|
For the current local GenieHive setup, this also works directly:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
PYTHONPATH=src .venv/bin/python -m citegeist verify \
|
||||||
|
--string 'Evans 1960' \
|
||||||
|
--context "bottlenose dolphin echolocation" \
|
||||||
|
--llm \
|
||||||
|
--llm-base-url http://127.0.0.1:8800/v1 \
|
||||||
|
--llm-api-key change-me-client-key \
|
||||||
|
--llm-model general_assistant \
|
||||||
|
--llm-role both \
|
||||||
|
--format json
|
||||||
|
```
|
||||||
|
|
||||||
|
There is also a local smoke script for the LLM helper path alone:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make live-verify-llm-smoke
|
||||||
|
```
|
||||||
|
|
||||||
|
Safety constraints:
|
||||||
|
|
||||||
|
- the LLM is never trusted for DOI or identifier invention
|
||||||
|
- the LLM only fills missing query clues or suggests candidate order
|
||||||
|
- `exact` status still requires verified resolver evidence, not LLM output
|
||||||
|
- if the LLM fails or returns unusable JSON, `verify` falls back to the normal resolver-only path
|
||||||
|
|
||||||
To compare backend output on the same plaintext references, use `compare-extract`. It aligns entries by ordinal/reference block and emits JSON with per-backend payloads plus a `differing_fields` summary for each row. Add `--summary` when you want a compact evaluation artifact with disagreement counts by field and backend presence counts instead of the full row-by-row payload. Add `--max-rows-with-differences` and/or `--max-field-difference-count` when you want CI-style failure thresholds; the command will emit the summary JSON and return a nonzero exit code if the limits are exceeded.
|
To compare backend output on the same plaintext references, use `compare-extract`. It aligns entries by ordinal/reference block and emits JSON with per-backend payloads plus a `differing_fields` summary for each row. Add `--summary` when you want a compact evaluation artifact with disagreement counts by field and backend presence counts instead of the full row-by-row payload. Add `--max-rows-with-differences` and/or `--max-field-difference-count` when you want CI-style failure thresholds; the command will emit the summary JSON and return a nonzero exit code if the limits are exceeded.
|
||||||
|
|
||||||
For regression-oriented parser work, keep a small curated plaintext fixture set and run `compare-extract` against multiple backends before changing heuristics. That makes backend disagreement explicit and gives you a stable review artifact for parser changes.
|
For regression-oriented parser work, keep a small curated plaintext fixture set and run `compare-extract` against multiple backends before changing heuristics. That makes backend disagreement explicit and gives you a stable review artifact for parser changes.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,97 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from citegeist.bibtex import BibEntry
|
||||||
|
from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(description="Run live LLM verify smoke checks against a local OpenAI-compatible endpoint")
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-url",
|
||||||
|
default=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
|
||||||
|
help="OpenAI-compatible or Ollama base URL",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--model",
|
||||||
|
default=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
|
||||||
|
help="Model or route ID exposed by the local endpoint",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--api-key",
|
||||||
|
default=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
|
||||||
|
help="Optional API key for the local endpoint",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--provider",
|
||||||
|
default=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
|
||||||
|
choices=["auto", "openai", "ollama-native"],
|
||||||
|
help="Endpoint protocol style",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = build_parser().parse_args()
|
||||||
|
client = VerificationLlmClient()
|
||||||
|
config = VerificationLlmConfig(
|
||||||
|
base_url=args.base_url,
|
||||||
|
model=args.model,
|
||||||
|
api_key=args.api_key,
|
||||||
|
provider=args.provider,
|
||||||
|
role="both",
|
||||||
|
)
|
||||||
|
|
||||||
|
analysis = client.analyze_query(
|
||||||
|
config,
|
||||||
|
"Evans 1960",
|
||||||
|
"marine mammals; bottlenose dolphin echolocation",
|
||||||
|
)
|
||||||
|
rerank = client.rerank_candidates(
|
||||||
|
config,
|
||||||
|
{"title": "", "authors": ["Evans"], "year": "1960", "venue": ""},
|
||||||
|
"bottlenose dolphin echolocation",
|
||||||
|
[
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="candidate_a",
|
||||||
|
fields={
|
||||||
|
"author": "Doe, Jane",
|
||||||
|
"title": "General Marine Biology Survey",
|
||||||
|
"year": "1960",
|
||||||
|
"journal": "Marine Science",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="candidate_b",
|
||||||
|
fields={
|
||||||
|
"author": "Evans, William",
|
||||||
|
"title": "Echolocation by marine dolphins",
|
||||||
|
"year": "1960",
|
||||||
|
"journal": "Journal of the Acoustical Society",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"base_url": args.base_url,
|
||||||
|
"model": args.model,
|
||||||
|
"analysis": analysis,
|
||||||
|
"rerank": rerank,
|
||||||
|
},
|
||||||
|
indent=2,
|
||||||
|
sort_keys=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
@ -13,6 +13,7 @@ from .extract import (
|
||||||
summarize_extraction_comparison,
|
summarize_extraction_comparison,
|
||||||
)
|
)
|
||||||
from .harvest import OaiMetadataFormat, OaiPmhHarvester, OaiSet
|
from .harvest import OaiMetadataFormat, OaiPmhHarvester, OaiSet
|
||||||
|
from .llm_verify import VerificationLlmClient, VerificationLlmConfig
|
||||||
from .resolve import MetadataResolver, merge_entries, merge_entries_with_conflicts
|
from .resolve import MetadataResolver, merge_entries, merge_entries_with_conflicts
|
||||||
from .sources import SourceClient
|
from .sources import SourceClient
|
||||||
from .storage import BibliographyStore
|
from .storage import BibliographyStore
|
||||||
|
|
@ -34,6 +35,8 @@ __all__ = [
|
||||||
"OaiMetadataFormat",
|
"OaiMetadataFormat",
|
||||||
"OaiSet",
|
"OaiSet",
|
||||||
"SourceClient",
|
"SourceClient",
|
||||||
|
"VerificationLlmClient",
|
||||||
|
"VerificationLlmConfig",
|
||||||
"VerificationMatch",
|
"VerificationMatch",
|
||||||
"VerificationResult",
|
"VerificationResult",
|
||||||
"available_extraction_backends",
|
"available_extraction_backends",
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ from .extract import (
|
||||||
summarize_extraction_comparison,
|
summarize_extraction_comparison,
|
||||||
)
|
)
|
||||||
from .harvest import OaiPmhHarvester
|
from .harvest import OaiPmhHarvester
|
||||||
|
from .llm_verify import VerificationLlmConfig
|
||||||
from .resolve import MetadataResolver, merge_entries_with_conflicts
|
from .resolve import MetadataResolver, merge_entries_with_conflicts
|
||||||
from .storage import BibliographyStore
|
from .storage import BibliographyStore
|
||||||
from .verify import BibliographyVerifier, render_verification_results
|
from .verify import BibliographyVerifier, render_verification_results
|
||||||
|
|
@ -145,6 +146,22 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
verify_group.add_argument("--bib", help="Path to a BibTeX file whose entries should be verified")
|
verify_group.add_argument("--bib", help="Path to a BibTeX file whose entries should be verified")
|
||||||
verify_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
|
verify_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
|
||||||
verify_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per input")
|
verify_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per input")
|
||||||
|
verify_parser.add_argument("--llm", action="store_true", help="Enable optional local LLM assistance for verify")
|
||||||
|
verify_parser.add_argument("--llm-base-url", help="OpenAI-compatible or Ollama base URL for local LLM assistance")
|
||||||
|
verify_parser.add_argument("--llm-model", help="Model ID for local LLM assistance")
|
||||||
|
verify_parser.add_argument("--llm-api-key", default="", help="Optional API key for the LLM endpoint")
|
||||||
|
verify_parser.add_argument(
|
||||||
|
"--llm-provider",
|
||||||
|
choices=["auto", "openai", "ollama-native"],
|
||||||
|
default="auto",
|
||||||
|
help="LLM API style; auto treats `/v1` endpoints as OpenAI-compatible",
|
||||||
|
)
|
||||||
|
verify_parser.add_argument(
|
||||||
|
"--llm-role",
|
||||||
|
choices=["expand", "rerank", "both"],
|
||||||
|
default="both",
|
||||||
|
help="Use the local LLM for query-clue extraction, candidate reranking, or both",
|
||||||
|
)
|
||||||
verify_parser.add_argument(
|
verify_parser.add_argument(
|
||||||
"--format",
|
"--format",
|
||||||
choices=["bibtex", "json"],
|
choices=["bibtex", "json"],
|
||||||
|
|
@ -715,7 +732,21 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
args.output,
|
args.output,
|
||||||
)
|
)
|
||||||
if args.command == "verify":
|
if args.command == "verify":
|
||||||
return _run_verify(args.string, args.list_input, args.bib, args.context, args.limit, args.format, args.output)
|
return _run_verify(
|
||||||
|
args.string,
|
||||||
|
args.list_input,
|
||||||
|
args.bib,
|
||||||
|
args.context,
|
||||||
|
args.limit,
|
||||||
|
args.format,
|
||||||
|
args.output,
|
||||||
|
llm_enabled=args.llm,
|
||||||
|
llm_base_url=args.llm_base_url,
|
||||||
|
llm_model=args.llm_model,
|
||||||
|
llm_api_key=args.llm_api_key,
|
||||||
|
llm_provider=args.llm_provider,
|
||||||
|
llm_role=args.llm_role,
|
||||||
|
)
|
||||||
if args.command == "resolve":
|
if args.command == "resolve":
|
||||||
return _run_resolve(store, args.citation_keys)
|
return _run_resolve(store, args.citation_keys)
|
||||||
if args.command == "resolve-stubs":
|
if args.command == "resolve-stubs":
|
||||||
|
|
@ -750,8 +781,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
args.rounds,
|
args.rounds,
|
||||||
args.recent_years,
|
args.recent_years,
|
||||||
args.target_recent_entries,
|
args.target_recent_entries,
|
||||||
args.max_expanded_entries,
|
|
||||||
args.max_expand_seconds,
|
|
||||||
)
|
)
|
||||||
if args.command == "set-topic-phrase":
|
if args.command == "set-topic-phrase":
|
||||||
return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
|
return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
|
||||||
|
|
@ -785,6 +814,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
args.expansion_rounds,
|
args.expansion_rounds,
|
||||||
args.recent_years,
|
args.recent_years,
|
||||||
args.target_recent_entries,
|
args.target_recent_entries,
|
||||||
|
args.max_expanded_entries,
|
||||||
|
args.max_expand_seconds,
|
||||||
)
|
)
|
||||||
if args.command == "bootstrap-batch":
|
if args.command == "bootstrap-batch":
|
||||||
return _run_bootstrap_batch(store, Path(args.input))
|
return _run_bootstrap_batch(store, Path(args.input))
|
||||||
|
|
@ -1121,8 +1152,27 @@ def _run_verify(
|
||||||
limit: int,
|
limit: int,
|
||||||
output_format: str,
|
output_format: str,
|
||||||
output: str | None,
|
output: str | None,
|
||||||
|
*,
|
||||||
|
llm_enabled: bool = False,
|
||||||
|
llm_base_url: str | None = None,
|
||||||
|
llm_model: str | None = None,
|
||||||
|
llm_api_key: str = "",
|
||||||
|
llm_provider: str = "auto",
|
||||||
|
llm_role: str = "both",
|
||||||
) -> int:
|
) -> int:
|
||||||
verifier = BibliographyVerifier()
|
llm_config = None
|
||||||
|
if llm_enabled:
|
||||||
|
if not llm_base_url or not llm_model:
|
||||||
|
print("--llm requires --llm-base-url and --llm-model", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
llm_config = VerificationLlmConfig(
|
||||||
|
base_url=llm_base_url,
|
||||||
|
model=llm_model,
|
||||||
|
api_key=llm_api_key,
|
||||||
|
provider=llm_provider,
|
||||||
|
role=llm_role,
|
||||||
|
)
|
||||||
|
verifier = BibliographyVerifier(llm_config=llm_config)
|
||||||
if string_input is not None:
|
if string_input is not None:
|
||||||
results = [verifier.verify_string(string_input, context=context, limit=limit)]
|
results = [verifier.verify_string(string_input, context=context, limit=limit)]
|
||||||
elif list_input is not None:
|
elif list_input is not None:
|
||||||
|
|
|
||||||
|
|
@ -196,7 +196,7 @@ class OpenAlexExpander:
|
||||||
)
|
)
|
||||||
results.append(
|
results.append(
|
||||||
ExpansionResult(
|
ExpansionResult(
|
||||||
source_citation_key=citation_key,
|
source_citation_key=source_key,
|
||||||
discovered_citation_key=existing_key or discovered.citation_key,
|
discovered_citation_key=existing_key or discovered.citation_key,
|
||||||
created_entry=created,
|
created_entry=created,
|
||||||
relation_type=relation_type,
|
relation_type=relation_type,
|
||||||
|
|
@ -335,7 +335,7 @@ class TopicExpander:
|
||||||
assigned_to_topic=assigned,
|
assigned_to_topic=assigned,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
|
if target_recent_entries is not None and len(recent_hits) >= target_recent_entries:
|
||||||
self.last_run_meta.update({
|
self.last_run_meta.update({
|
||||||
"stop_reason": "target_recent_entries",
|
"stop_reason": "target_recent_entries",
|
||||||
"recent_hits": len(recent_hits),
|
"recent_hits": len(recent_hits),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,218 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import urllib.request
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
from .bibtex import BibEntry
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_SYSTEM_PROMPT = (
|
||||||
|
"You are a meticulous bibliography verification assistant. "
|
||||||
|
"You never invent DOIs, page ranges, venues, or identifiers. "
|
||||||
|
"You may only suggest missing clues from the provided input and context. "
|
||||||
|
"When uncertain, return null or an empty list. "
|
||||||
|
"Always respond with strict JSON matching the requested shape."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class VerificationLlmConfig:
|
||||||
|
base_url: str
|
||||||
|
model: str
|
||||||
|
api_key: str = ""
|
||||||
|
provider: str = "auto"
|
||||||
|
role: str = "both"
|
||||||
|
|
||||||
|
def enabled_for(self, capability: str) -> bool:
|
||||||
|
return bool(self.base_url and self.model) and self.role in {capability, "both"}
|
||||||
|
|
||||||
|
|
||||||
|
class VerificationLlmClient:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
timeout_s: int = 60,
|
||||||
|
post_json: Callable[[str, dict[str, Any], dict[str, str], int], dict[str, Any]] | None = None,
|
||||||
|
) -> None:
|
||||||
|
self.timeout_s = timeout_s
|
||||||
|
self._post_json = post_json or _default_post_json
|
||||||
|
|
||||||
|
def analyze_query(
|
||||||
|
self,
|
||||||
|
config: VerificationLlmConfig,
|
||||||
|
free_text: str,
|
||||||
|
context: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
if not config.enabled_for("expand"):
|
||||||
|
return None
|
||||||
|
payload = {
|
||||||
|
"task": "extract_bibliographic_clues",
|
||||||
|
"input": {"free_text": free_text, "context": context},
|
||||||
|
"rules": [
|
||||||
|
"Never invent a DOI or identifier.",
|
||||||
|
"Only fill clues that plausibly follow from the input and context.",
|
||||||
|
"Return null for unknown scalar fields.",
|
||||||
|
],
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {"type": ["string", "null"]},
|
||||||
|
"authors": {"type": "array", "items": {"type": "string"}},
|
||||||
|
"year": {"type": ["string", "null"]},
|
||||||
|
"venue": {"type": ["string", "null"]},
|
||||||
|
"keywords": {"type": "array", "items": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"required": ["authors", "keywords"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result = self._chat_json(config, payload)
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
return None
|
||||||
|
authors = [str(value).strip() for value in result.get("authors", []) if str(value).strip()]
|
||||||
|
keywords = [str(value).strip() for value in result.get("keywords", []) if str(value).strip()]
|
||||||
|
return {
|
||||||
|
"title": _optional_string(result.get("title")),
|
||||||
|
"authors": authors,
|
||||||
|
"year": _optional_string(result.get("year")),
|
||||||
|
"venue": _optional_string(result.get("venue")),
|
||||||
|
"keywords": keywords,
|
||||||
|
}
|
||||||
|
|
||||||
|
def rerank_candidates(
|
||||||
|
self,
|
||||||
|
config: VerificationLlmConfig,
|
||||||
|
query_fields: dict[str, object],
|
||||||
|
context: str,
|
||||||
|
candidates: list[BibEntry],
|
||||||
|
) -> list[int] | None:
|
||||||
|
if not config.enabled_for("rerank") or not candidates:
|
||||||
|
return None
|
||||||
|
payload = {
|
||||||
|
"task": "rerank_candidates",
|
||||||
|
"instruction": (
|
||||||
|
"Return a JSON array of candidate indices sorted best to worst. "
|
||||||
|
"Do not invent metadata. Prefer candidates that better match the given clues."
|
||||||
|
),
|
||||||
|
"input": {
|
||||||
|
"query_fields": query_fields,
|
||||||
|
"context": context,
|
||||||
|
"candidates": [
|
||||||
|
{
|
||||||
|
"title": entry.fields.get("title", ""),
|
||||||
|
"authors": entry.fields.get("author", "").split(" and ") if entry.fields.get("author") else [],
|
||||||
|
"year": entry.fields.get("year", ""),
|
||||||
|
"venue": entry.fields.get("journal", "") or entry.fields.get("booktitle", ""),
|
||||||
|
"doi": entry.fields.get("doi", ""),
|
||||||
|
}
|
||||||
|
for entry in candidates[:8]
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result = self._chat_json(config, payload)
|
||||||
|
if not isinstance(result, list):
|
||||||
|
return None
|
||||||
|
indices = [value for value in result if isinstance(value, int) and 0 <= value < len(candidates)]
|
||||||
|
return indices or None
|
||||||
|
|
||||||
|
def _chat_json(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
|
||||||
|
try:
|
||||||
|
if _llm_mode(config.base_url, config.provider) == "openai":
|
||||||
|
return self._chat_openai(config, payload)
|
||||||
|
return self._chat_ollama_native(config, payload)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _chat_openai(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if config.api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||||
|
body = {
|
||||||
|
"model": config.model,
|
||||||
|
"temperature": 0,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": json.dumps(payload)},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
data = self._post_json(
|
||||||
|
config.base_url.rstrip("/") + "/chat/completions",
|
||||||
|
body,
|
||||||
|
headers,
|
||||||
|
self.timeout_s,
|
||||||
|
)
|
||||||
|
content = data["choices"][0]["message"]["content"]
|
||||||
|
return _loads_lenient_json(content)
|
||||||
|
|
||||||
|
def _chat_ollama_native(self, config: VerificationLlmConfig, payload: dict[str, Any]) -> Any:
|
||||||
|
base_url = config.base_url.rstrip("/")
|
||||||
|
if base_url.endswith("/v1"):
|
||||||
|
base_url = base_url[:-3]
|
||||||
|
body = {
|
||||||
|
"model": config.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": json.dumps(payload)},
|
||||||
|
],
|
||||||
|
"options": {"temperature": 0},
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if config.api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||||
|
data = self._post_json(
|
||||||
|
base_url + "/api/chat",
|
||||||
|
body,
|
||||||
|
headers,
|
||||||
|
self.timeout_s,
|
||||||
|
)
|
||||||
|
content = data["message"]["content"]
|
||||||
|
return _loads_lenient_json(content)
|
||||||
|
|
||||||
|
|
||||||
|
def _default_post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout_s: int) -> dict[str, Any]:
|
||||||
|
request = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
headers=headers,
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=timeout_s) as response:
|
||||||
|
return json.loads(response.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _llm_mode(base_url: str, provider: str) -> str:
|
||||||
|
if provider == "openai":
|
||||||
|
return "openai"
|
||||||
|
if provider == "ollama-native":
|
||||||
|
return "ollama-native"
|
||||||
|
return "openai" if base_url.rstrip("/").endswith("/v1") else "ollama-native"
|
||||||
|
|
||||||
|
|
||||||
|
def _optional_string(value: object) -> str | None:
|
||||||
|
text = str(value or "").strip()
|
||||||
|
return text or None
|
||||||
|
|
||||||
|
|
||||||
|
def _loads_lenient_json(content: str) -> Any:
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
fenced = re.search(r"```(?:json)?\s*(\{.*\}|\[.*\])\s*```", content, flags=re.DOTALL)
|
||||||
|
if fenced:
|
||||||
|
return json.loads(fenced.group(1))
|
||||||
|
|
||||||
|
for opener, closer in (("{", "}"), ("[", "]")):
|
||||||
|
start = content.find(opener)
|
||||||
|
end = content.rfind(closer)
|
||||||
|
if start != -1 and end != -1 and end > start:
|
||||||
|
snippet = content[start : end + 1]
|
||||||
|
try:
|
||||||
|
return json.loads(snippet)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
raise ValueError("Model response did not contain parseable JSON")
|
||||||
|
|
@ -246,7 +246,13 @@ class BibliographyStore:
|
||||||
entry.fields.get("isbn"),
|
entry.fields.get("isbn"),
|
||||||
fulltext,
|
fulltext,
|
||||||
raw_bibtex,
|
raw_bibtex,
|
||||||
json.dumps({k: v for k, v in entry.fields.items() if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS}),
|
json.dumps(
|
||||||
|
{
|
||||||
|
k: v
|
||||||
|
for k, v in entry.fields.items()
|
||||||
|
if k not in CORE_ENTRY_FIELDS and k not in RELATION_FIELDS and k not in {"author", "editor"}
|
||||||
|
}
|
||||||
|
),
|
||||||
),
|
),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
entry_id = int(row["id"])
|
entry_id = int(row["id"])
|
||||||
|
|
@ -1142,6 +1148,8 @@ class BibliographyStore:
|
||||||
|
|
||||||
extra_fields = json.loads(row["extra_fields_json"])
|
extra_fields = json.loads(row["extra_fields_json"])
|
||||||
for field_name in sorted(extra_fields):
|
for field_name in sorted(extra_fields):
|
||||||
|
if field_name in {"author", "editor"}:
|
||||||
|
continue
|
||||||
value = extra_fields[field_name]
|
value = extra_fields[field_name]
|
||||||
if value:
|
if value:
|
||||||
fields[field_name] = str(value)
|
fields[field_name] = str(value)
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
||||||
|
from .llm_verify import VerificationLlmClient, VerificationLlmConfig
|
||||||
from .resolve import MetadataResolver, Resolution
|
from .resolve import MetadataResolver, Resolution
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -75,8 +76,16 @@ class VerificationResult:
|
||||||
|
|
||||||
|
|
||||||
class BibliographyVerifier:
|
class BibliographyVerifier:
|
||||||
def __init__(self, resolver: MetadataResolver | None = None) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
resolver: MetadataResolver | None = None,
|
||||||
|
*,
|
||||||
|
llm_config: VerificationLlmConfig | None = None,
|
||||||
|
llm_client: VerificationLlmClient | None = None,
|
||||||
|
) -> None:
|
||||||
self.resolver = resolver or MetadataResolver()
|
self.resolver = resolver or MetadataResolver()
|
||||||
|
self.llm_config = llm_config
|
||||||
|
self.llm_client = llm_client or VerificationLlmClient()
|
||||||
|
|
||||||
def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
|
def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
|
||||||
query_fields = _fields_from_string(value)
|
query_fields = _fields_from_string(value)
|
||||||
|
|
@ -164,10 +173,18 @@ class BibliographyVerifier:
|
||||||
input_key=input_key,
|
input_key=input_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
query_fields = _clone_query_fields(query_fields)
|
||||||
|
search_query = query
|
||||||
|
if self.llm_config is not None:
|
||||||
|
hints = self.llm_client.analyze_query(self.llm_config, query, context)
|
||||||
|
if hints:
|
||||||
|
_apply_llm_hints(query_fields, hints)
|
||||||
|
search_query = _build_search_query(search_query, hints)
|
||||||
|
|
||||||
candidate_limit = max(1, limit)
|
candidate_limit = max(1, limit)
|
||||||
candidates = self._collect_candidates(
|
candidates = self._collect_candidates(
|
||||||
title=str(query_fields.get("title", "")),
|
title=str(query_fields.get("title", "")),
|
||||||
query=query,
|
query=search_query,
|
||||||
limit=candidate_limit,
|
limit=candidate_limit,
|
||||||
)
|
)
|
||||||
scored = [
|
scored = [
|
||||||
|
|
@ -178,9 +195,21 @@ class BibliographyVerifier:
|
||||||
)
|
)
|
||||||
for entry, source_label in candidates
|
for entry, source_label in candidates
|
||||||
]
|
]
|
||||||
|
llm_ranks = _compute_llm_ranks(
|
||||||
|
self.llm_client.rerank_candidates(
|
||||||
|
self.llm_config,
|
||||||
|
query_fields,
|
||||||
|
context,
|
||||||
|
[match.entry for match in scored],
|
||||||
|
)
|
||||||
|
if self.llm_config is not None
|
||||||
|
else None,
|
||||||
|
scored,
|
||||||
|
)
|
||||||
scored.sort(
|
scored.sort(
|
||||||
key=lambda item: (
|
key=lambda item: (
|
||||||
-item.score,
|
-item.score,
|
||||||
|
llm_ranks.get(item.entry.citation_key, len(scored)),
|
||||||
item.entry.fields.get("year", ""),
|
item.entry.fields.get("year", ""),
|
||||||
item.entry.citation_key,
|
item.entry.citation_key,
|
||||||
)
|
)
|
||||||
|
|
@ -255,6 +284,31 @@ def _fields_from_string(value: str) -> dict[str, object]:
|
||||||
return {"title": title, "authors": authors, "year": year, "venue": ""}
|
return {"title": title, "authors": authors, "year": year, "venue": ""}
|
||||||
|
|
||||||
|
|
||||||
|
def _clone_query_fields(query_fields: dict[str, object]) -> dict[str, object]:
|
||||||
|
cloned = dict(query_fields)
|
||||||
|
authors = cloned.get("authors", [])
|
||||||
|
cloned["authors"] = list(authors) if isinstance(authors, list) else []
|
||||||
|
return cloned
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_llm_hints(query_fields: dict[str, object], hints: dict[str, object]) -> None:
|
||||||
|
if not str(query_fields.get("title", "")).strip() and hints.get("title"):
|
||||||
|
query_fields["title"] = str(hints["title"])
|
||||||
|
if not query_fields.get("authors") and hints.get("authors"):
|
||||||
|
query_fields["authors"] = [str(author) for author in hints["authors"] if str(author).strip()]
|
||||||
|
if not str(query_fields.get("year", "")).strip() and hints.get("year"):
|
||||||
|
query_fields["year"] = str(hints["year"])
|
||||||
|
if not str(query_fields.get("venue", "")).strip() and hints.get("venue"):
|
||||||
|
query_fields["venue"] = str(hints["venue"])
|
||||||
|
|
||||||
|
|
||||||
|
def _build_search_query(query: str, hints: dict[str, object]) -> str:
|
||||||
|
keywords = [str(value).strip() for value in hints.get("keywords", []) if str(value).strip()]
|
||||||
|
if not keywords:
|
||||||
|
return query
|
||||||
|
return " ".join(part for part in [query, " ".join(keywords[:5])] if part).strip()
|
||||||
|
|
||||||
|
|
||||||
def _score_candidate(query_fields: dict[str, object], context: str, entry: BibEntry) -> float:
|
def _score_candidate(query_fields: dict[str, object], context: str, entry: BibEntry) -> float:
|
||||||
score = 0.0
|
score = 0.0
|
||||||
query_title = _tokenize(str(query_fields.get("title", "")))
|
query_title = _tokenize(str(query_fields.get("title", "")))
|
||||||
|
|
@ -371,3 +425,13 @@ def _placeholder_entry(query_fields: dict[str, object], query: str, input_key: s
|
||||||
def _slugify_key(value: str) -> str:
|
def _slugify_key(value: str) -> str:
|
||||||
slug = re.sub(r"[^a-z0-9]+", "", value.lower())
|
slug = re.sub(r"[^a-z0-9]+", "", value.lower())
|
||||||
return slug[:40] or "verification"
|
return slug[:40] or "verification"
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_llm_ranks(order: list[int] | None, matches: list[VerificationMatch]) -> dict[str, int]:
|
||||||
|
if not order:
|
||||||
|
return {}
|
||||||
|
ranks: dict[str, int] = {}
|
||||||
|
for rank, index in enumerate(order):
|
||||||
|
if 0 <= index < len(matches):
|
||||||
|
ranks[matches[index].entry.citation_key] = rank
|
||||||
|
return ranks
|
||||||
|
|
|
||||||
|
|
@ -247,6 +247,71 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
|
||||||
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
|
||||||
|
stderr_buffer = io.StringIO()
|
||||||
|
with redirect_stderr(stderr_buffer):
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"--db",
|
||||||
|
str(tmp_path / "library.sqlite3"),
|
||||||
|
"verify",
|
||||||
|
"--string",
|
||||||
|
"Evans 1960",
|
||||||
|
"--llm",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exit_code == 1
|
||||||
|
assert "--llm requires --llm-base-url and --llm-model" in stderr_buffer.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_verify_builds_llm_config(tmp_path: Path):
|
||||||
|
from citegeist.bibtex import BibEntry
|
||||||
|
from citegeist.verify import VerificationResult
|
||||||
|
|
||||||
|
database = tmp_path / "library.sqlite3"
|
||||||
|
with patch("citegeist.cli.BibliographyVerifier") as mocked_verifier_cls:
|
||||||
|
mocked_verifier = mocked_verifier_cls.return_value
|
||||||
|
mocked_verifier.verify_string.return_value = VerificationResult(
|
||||||
|
query="Evans 1960",
|
||||||
|
context="marine mammals",
|
||||||
|
status="ambiguous",
|
||||||
|
confidence=0.6,
|
||||||
|
entry=BibEntry(entry_type="misc", citation_key="evans1960", fields={"title": "Evans 1960"}),
|
||||||
|
source_label="none",
|
||||||
|
alternates=[],
|
||||||
|
input_type="string",
|
||||||
|
input_key=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_buffer = io.StringIO()
|
||||||
|
with redirect_stdout(stdout_buffer):
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"--db",
|
||||||
|
str(database),
|
||||||
|
"verify",
|
||||||
|
"--string",
|
||||||
|
"Evans 1960",
|
||||||
|
"--llm",
|
||||||
|
"--llm-base-url",
|
||||||
|
"http://localhost:11434",
|
||||||
|
"--llm-model",
|
||||||
|
"qwen3",
|
||||||
|
"--llm-role",
|
||||||
|
"rerank",
|
||||||
|
"--format",
|
||||||
|
"json",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exit_code == 0
|
||||||
|
kwargs = mocked_verifier_cls.call_args.kwargs
|
||||||
|
assert kwargs["llm_config"].base_url == "http://localhost:11434"
|
||||||
|
assert kwargs["llm_config"].model == "qwen3"
|
||||||
|
assert kwargs["llm_config"].role == "rerank"
|
||||||
|
|
||||||
|
|
||||||
def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
|
def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
|
||||||
bib_path = tmp_path / "jabref-library.bib"
|
bib_path = tmp_path / "jabref-library.bib"
|
||||||
bib_path.write_text(
|
bib_path.write_text(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from citegeist.bibtex import BibEntry
|
||||||
|
from citegeist.llm_verify import VerificationLlmClient, VerificationLlmConfig
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.live
|
||||||
|
|
||||||
|
|
||||||
|
def _live_llm_config() -> VerificationLlmConfig:
|
||||||
|
return VerificationLlmConfig(
|
||||||
|
base_url=os.environ.get("CITEGEIST_VERIFY_LLM_BASE_URL", "http://127.0.0.1:8800/v1"),
|
||||||
|
model=os.environ.get("CITEGEIST_VERIFY_LLM_MODEL", "general_assistant"),
|
||||||
|
api_key=os.environ.get("CITEGEIST_VERIFY_LLM_API_KEY", "change-me-client-key"),
|
||||||
|
provider=os.environ.get("CITEGEIST_VERIFY_LLM_PROVIDER", "auto"),
|
||||||
|
role="both",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_live_llm_query_analysis_via_geniehive():
|
||||||
|
client = VerificationLlmClient()
|
||||||
|
result = client.analyze_query(
|
||||||
|
_live_llm_config(),
|
||||||
|
"Evans 1960",
|
||||||
|
"marine mammals; bottlenose dolphin echolocation",
|
||||||
|
)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
pytest.skip("local GenieHive route did not return parseable JSON for query analysis")
|
||||||
|
assert isinstance(result["authors"], list)
|
||||||
|
assert isinstance(result["keywords"], list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_live_llm_candidate_rerank_via_geniehive():
|
||||||
|
client = VerificationLlmClient()
|
||||||
|
candidates = [
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="candidate_a",
|
||||||
|
fields={
|
||||||
|
"author": "Doe, Jane",
|
||||||
|
"title": "General Marine Biology Survey",
|
||||||
|
"year": "1960",
|
||||||
|
"journal": "Marine Science",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="candidate_b",
|
||||||
|
fields={
|
||||||
|
"author": "Evans, William",
|
||||||
|
"title": "Echolocation by marine dolphins",
|
||||||
|
"year": "1960",
|
||||||
|
"journal": "Journal of the Acoustical Society",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
result = client.rerank_candidates(
|
||||||
|
_live_llm_config(),
|
||||||
|
{
|
||||||
|
"title": "",
|
||||||
|
"authors": ["Evans"],
|
||||||
|
"year": "1960",
|
||||||
|
"venue": "",
|
||||||
|
},
|
||||||
|
"bottlenose dolphin echolocation",
|
||||||
|
candidates,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
pytest.skip("local GenieHive route did not return parseable JSON for candidate reranking")
|
||||||
|
assert result
|
||||||
|
assert all(isinstance(index, int) for index in result)
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from citegeist.bibtex import BibEntry
|
from citegeist.bibtex import BibEntry
|
||||||
|
from citegeist.llm_verify import VerificationLlmConfig, _loads_lenient_json
|
||||||
from citegeist.resolve import Resolution
|
from citegeist.resolve import Resolution
|
||||||
from citegeist.verify import BibliographyVerifier
|
from citegeist.verify import BibliographyVerifier
|
||||||
|
|
||||||
|
|
@ -120,3 +121,102 @@ def test_verification_result_to_bib_entry_contains_audit_fields():
|
||||||
|
|
||||||
assert bib_entry.fields["x_status"] == "not_found"
|
assert bib_entry.fields["x_status"] == "not_found"
|
||||||
assert bib_entry.fields["x_query"] == "Missing Work"
|
assert bib_entry.fields["x_query"] == "Missing Work"
|
||||||
|
|
||||||
|
|
||||||
|
def test_verifier_llm_expand_only_fills_missing_fields():
|
||||||
|
class _FakeLlmClient:
|
||||||
|
def analyze_query(self, config, query, context):
|
||||||
|
return {
|
||||||
|
"title": "Expanded Title",
|
||||||
|
"authors": ["Smith"],
|
||||||
|
"year": "2024",
|
||||||
|
"venue": "Journal of Tests",
|
||||||
|
"keywords": ["echolocation", "marine"],
|
||||||
|
}
|
||||||
|
|
||||||
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
||||||
|
return None
|
||||||
|
|
||||||
|
verifier = BibliographyVerifier(
|
||||||
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
|
||||||
|
llm_client=_FakeLlmClient(),
|
||||||
|
)
|
||||||
|
seen_titles: list[str] = []
|
||||||
|
verifier.resolver.search_crossref = lambda title, limit=5: (seen_titles.append(title) or []) # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
|
||||||
|
verifier.verify_string("Evans 1960", context="bottlenose dolphin echolocation")
|
||||||
|
|
||||||
|
assert seen_titles == ["Expanded Title"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_verifier_llm_rerank_only_breaks_score_ties():
|
||||||
|
class _FakeLlmClient:
|
||||||
|
def analyze_query(self, config, query, context):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
||||||
|
return [1, 0]
|
||||||
|
|
||||||
|
verifier = BibliographyVerifier(
|
||||||
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="rerank"),
|
||||||
|
llm_client=_FakeLlmClient(),
|
||||||
|
)
|
||||||
|
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="alpha",
|
||||||
|
fields={"author": "Smith, Jane", "title": "Shared Match Primary", "year": "2024"},
|
||||||
|
),
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="beta",
|
||||||
|
fields={"author": "Smith, Jane", "title": "Shared Match Secondary", "year": "2024"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
|
||||||
|
result = verifier.verify_string('"Shared Match" Smith 2024')
|
||||||
|
|
||||||
|
assert result.entry.citation_key == "beta"
|
||||||
|
assert result.alternates[0].entry.citation_key == "alpha"
|
||||||
|
|
||||||
|
|
||||||
|
def test_verifier_llm_cannot_create_exact_without_verified_doi():
|
||||||
|
class _FakeLlmClient:
|
||||||
|
def analyze_query(self, config, query, context):
|
||||||
|
return {"title": "Resolved Work", "authors": ["Smith"], "year": "2024", "venue": None, "keywords": []}
|
||||||
|
|
||||||
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
||||||
|
return None
|
||||||
|
|
||||||
|
verifier = BibliographyVerifier(
|
||||||
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
|
||||||
|
llm_client=_FakeLlmClient(),
|
||||||
|
)
|
||||||
|
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="candidate",
|
||||||
|
fields={"author": "Smith, Jane", "title": "Resolved Work", "year": "2024"},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
||||||
|
|
||||||
|
result = verifier.verify_string("Smith 2024", context="citation graphs")
|
||||||
|
|
||||||
|
assert result.status != "exact"
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_json_loader_accepts_fenced_payload():
|
||||||
|
payload = '```json\n{"title":"Resolved Work","authors":["Smith"],"keywords":["graphs"]}\n```'
|
||||||
|
|
||||||
|
result = _loads_lenient_json(payload)
|
||||||
|
|
||||||
|
assert result["title"] == "Resolved Work"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue