CiteGeist/tests/test_topic_expand.py

243 lines
7.3 KiB
Python

from citegeist.bibtex import BibEntry
from citegeist.expand import (
ExpansionResult,
TopicExpander,
_meets_topic_assignment_threshold,
_topic_relevance_score,
)
from citegeist.storage import BibliographyStore
class FakeOpenAlexExpander:
def __init__(self, results: list[ExpansionResult] | dict[str, list[ExpansionResult]]) -> None:
self.results = results
def expand_entry(self, store, citation_key, relation_type="cites", limit=25):
if isinstance(self.results, dict):
return list(self.results.get(citation_key, []))
return list(self.results)
def test_topic_expander_assigns_relevant_discoveries_back_to_topic():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered1",
fields={
"title": "Abiogenesis and origin chemistry",
"abstract": "A study of abiogenesis pathways.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered2",
fields={
"title": "Galaxy formation dynamics",
"abstract": "Nothing about the topic.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
expander = TopicExpander(
openalex_expander=FakeOpenAlexExpander(
[
ExpansionResult("seed2024", "discovered1", False, "cites", "openalex:cites:seed2024"),
ExpansionResult("seed2024", "discovered2", False, "cites", "openalex:cites:seed2024"),
]
)
)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
min_relevance=0.34,
)
assert len(results) == 2
assigned = {item.discovered_citation_key: item.assigned_to_topic for item in results}
assert assigned["discovered1"] is True
assert assigned["discovered2"] is False
topics = store.get_entry_topics("discovered1")
assert topics[0]["slug"] == "abiogenesis"
assert store.get_entry_topics("discovered2") == []
finally:
store.close()
def test_topic_expander_can_restrict_to_allowed_seed_keys():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
@article{seed2023,
author = {Seed, Bob},
title = {Abiogenesis Historical Seed},
year = {2023}
}
"""
)
for citation_key in ("seed2024", "seed2023"):
store.add_entry_topic(
citation_key,
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered1",
fields={
"title": "Abiogenesis origin chemistry",
"abstract": "A study of abiogenesis chemistry.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
expander = TopicExpander(
openalex_expander=FakeOpenAlexExpander(
{"seed2023": [ExpansionResult("seed2023", "discovered1", False, "cites", "openalex:cites:seed2023")]}
)
)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
seed_keys=["seed2024"],
)
assert results == []
assert store.get_entry_topics("discovered1") == []
finally:
store.close()
def test_topic_expander_preview_discovers_without_writing():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.connection.commit()
expander = TopicExpander()
expander._preview_discoveries = lambda *_args, **_kwargs: [ # type: ignore[method-assign]
(
ExpansionResult(
"seed2024",
"preview1",
True,
"cites",
"openalex:cites:seed2024",
),
{
"title": "Abiogenesis origin chemistry",
"abstract": "A study of abiogenesis chemistry.",
"year": "2025",
},
)
]
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
min_relevance=0.3,
preview_only=True,
)
assert len(results) == 1
assert results[0].discovered_citation_key == "preview1"
assert results[0].meets_relevance_threshold is True
assert results[0].assigned_to_topic is False
assert results[0].created_entry is True
assert store.get_entry("preview1") is None
assert store.get_entry_topics("preview1") == []
finally:
store.close()
def test_topic_relevance_score_expands_human_evolution_terms():
score = _topic_relevance_score(
"human evolution",
{
"title": "Body size and proportions in early hominids",
"abstract": "A fossil and paleolithic perspective on primate ancestry.",
"journal": "Science",
},
)
assert score >= 0.15
def test_topic_assignment_requires_title_anchor():
entry = {
"title": "Phylogenies and the Comparative Method",
"abstract": "A comparative framework for primate and hominid evolution.",
"journal": "Systematic Zoology",
}
score = _topic_relevance_score("human evolution", entry)
assert score >= 0.15
assert _meets_topic_assignment_threshold("human evolution", entry, min_relevance=0.15, relevance_score=score) is False