223 lines
8.4 KiB
Python
223 lines
8.4 KiB
Python
from __future__ import annotations
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.llm_verify import VerificationLlmConfig, _loads_lenient_json
|
|
from citegeist.resolve import Resolution
|
|
from citegeist.verify import BibliographyVerifier
|
|
|
|
|
|
def test_verifier_uses_direct_doi_resolution_for_bib_entries():
|
|
verifier = BibliographyVerifier()
|
|
verifier.resolver.resolve_doi = lambda value: Resolution( # type: ignore[method-assign]
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="doi101000example",
|
|
fields={
|
|
"author": "Smith, Jane",
|
|
"title": "Resolved Work",
|
|
"year": "2024",
|
|
"doi": value,
|
|
},
|
|
),
|
|
source_type="resolver",
|
|
source_label=f"crossref:doi:{value}",
|
|
)
|
|
|
|
result = verifier.verify_bib_entry(
|
|
BibEntry(
|
|
entry_type="misc",
|
|
citation_key="seed2024",
|
|
fields={"title": "Rough Work", "doi": "10.1000/example"},
|
|
)
|
|
)
|
|
|
|
assert result.status == "exact"
|
|
assert result.confidence == 1.0
|
|
assert result.entry.fields["title"] == "Resolved Work"
|
|
assert result.source_label == "crossref:doi:10.1000/example"
|
|
|
|
|
|
def test_verifier_uses_direct_pmid_resolution_for_bib_entries():
|
|
verifier = BibliographyVerifier()
|
|
verifier.resolver.resolve_pmid = lambda value: Resolution( # type: ignore[method-assign]
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="pmid12345678",
|
|
fields={
|
|
"author": "Smith, Jane",
|
|
"title": "Resolved PubMed Work",
|
|
"year": "2024",
|
|
"pmid": value,
|
|
},
|
|
),
|
|
source_type="resolver",
|
|
source_label=f"pubmed:pmid:{value}",
|
|
)
|
|
|
|
result = verifier.verify_bib_entry(
|
|
BibEntry(
|
|
entry_type="misc",
|
|
citation_key="seed2024",
|
|
fields={"title": "Rough Work", "pmid": "12345678"},
|
|
)
|
|
)
|
|
|
|
assert result.status == "exact"
|
|
assert result.confidence == 1.0
|
|
assert result.entry.fields["title"] == "Resolved PubMed Work"
|
|
assert result.source_label == "pubmed:pmid:12345678"
|
|
|
|
|
|
def test_verifier_scores_and_sorts_search_candidates():
|
|
verifier = BibliographyVerifier()
|
|
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="goodmatch",
|
|
fields={
|
|
"author": "Smith, Jane",
|
|
"title": "Graph-first bibliography augmentation",
|
|
"year": "2024",
|
|
"doi": "10.1000/good",
|
|
},
|
|
),
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="weaker",
|
|
fields={
|
|
"author": "Doe, Alex",
|
|
"title": "Graph search methods",
|
|
"year": "2023",
|
|
},
|
|
),
|
|
]
|
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
|
|
result = verifier.verify_string('"Graph-first bibliography augmentation" Smith 2024')
|
|
|
|
assert result.entry.citation_key == "goodmatch"
|
|
assert result.status in {"high_confidence", "exact"}
|
|
assert result.alternates[0].entry.citation_key == "weaker"
|
|
|
|
|
|
def test_verification_result_to_bib_entry_contains_audit_fields():
|
|
verifier = BibliographyVerifier()
|
|
verifier.resolver.search_crossref = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
|
|
result = verifier._verify_query( # type: ignore[attr-defined]
|
|
{"title": "Missing Work", "authors": [], "year": "", "venue": ""},
|
|
query="Missing Work",
|
|
context="",
|
|
limit=1,
|
|
input_type="string",
|
|
)
|
|
|
|
bib_entry = result.to_bib_entry()
|
|
|
|
assert bib_entry.fields["x_status"] == "not_found"
|
|
assert bib_entry.fields["x_query"] == "Missing Work"
|
|
|
|
|
|
def test_verifier_llm_expand_only_fills_missing_fields():
|
|
class _FakeLlmClient:
|
|
def analyze_query(self, config, query, context):
|
|
return {
|
|
"title": "Expanded Title",
|
|
"authors": ["Smith"],
|
|
"year": "2024",
|
|
"venue": "Journal of Tests",
|
|
"keywords": ["echolocation", "marine"],
|
|
}
|
|
|
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
|
return None
|
|
|
|
verifier = BibliographyVerifier(
|
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
|
|
llm_client=_FakeLlmClient(),
|
|
)
|
|
seen_titles: list[str] = []
|
|
verifier.resolver.search_crossref = lambda title, limit=5: (seen_titles.append(title) or []) # type: ignore[method-assign]
|
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
|
|
verifier.verify_string("Evans 1960", context="bottlenose dolphin echolocation")
|
|
|
|
assert seen_titles == ["Expanded Title"]
|
|
|
|
|
|
def test_verifier_llm_rerank_only_breaks_score_ties():
|
|
class _FakeLlmClient:
|
|
def analyze_query(self, config, query, context):
|
|
return None
|
|
|
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
|
return [1, 0]
|
|
|
|
verifier = BibliographyVerifier(
|
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="rerank"),
|
|
llm_client=_FakeLlmClient(),
|
|
)
|
|
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="alpha",
|
|
fields={"author": "Smith, Jane", "title": "Shared Match Primary", "year": "2024"},
|
|
),
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="beta",
|
|
fields={"author": "Smith, Jane", "title": "Shared Match Secondary", "year": "2024"},
|
|
),
|
|
]
|
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
|
|
result = verifier.verify_string('"Shared Match" Smith 2024')
|
|
|
|
assert result.entry.citation_key == "beta"
|
|
assert result.alternates[0].entry.citation_key == "alpha"
|
|
|
|
|
|
def test_verifier_llm_cannot_create_exact_without_verified_doi():
|
|
class _FakeLlmClient:
|
|
def analyze_query(self, config, query, context):
|
|
return {"title": "Resolved Work", "authors": ["Smith"], "year": "2024", "venue": None, "keywords": []}
|
|
|
|
def rerank_candidates(self, config, query_fields, context, candidates):
|
|
return None
|
|
|
|
verifier = BibliographyVerifier(
|
|
llm_config=VerificationLlmConfig(base_url="http://localhost:11434", model="qwen", role="expand"),
|
|
llm_client=_FakeLlmClient(),
|
|
)
|
|
verifier.resolver.search_crossref = lambda title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="candidate",
|
|
fields={"author": "Smith, Jane", "title": "Resolved Work", "year": "2024"},
|
|
)
|
|
]
|
|
verifier.resolver.search_openalex = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_datacite = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
verifier.resolver.search_pubmed = lambda title, limit=5: [] # type: ignore[method-assign]
|
|
|
|
result = verifier.verify_string("Smith 2024", context="citation graphs")
|
|
|
|
assert result.status != "exact"
|
|
|
|
|
|
def test_llm_json_loader_accepts_fenced_payload():
|
|
payload = '```json\n{"title":"Resolved Work","authors":["Smith"],"keywords":["graphs"]}\n```'
|
|
|
|
result = _loads_lenient_json(payload)
|
|
|
|
assert result["title"] == "Resolved Work"
|