CiteGeist/tests/test_topic_expand.py

340 lines
10 KiB
Python

from citegeist.bibtex import BibEntry
from citegeist.expand import (
ExpansionResult,
TopicExpander,
_meets_topic_assignment_threshold,
_topic_relevance_score,
)
from citegeist.storage import BibliographyStore
class FakeOpenAlexExpander:
def __init__(self, results: list[ExpansionResult] | dict[str, list[ExpansionResult]]) -> None:
self.results = results
self.calls: list[tuple[str, str, int]] = []
def expand_entry(self, store, citation_key, relation_type="cites", limit=25):
self.calls.append((citation_key, relation_type, limit))
if isinstance(self.results, dict):
return list(self.results.get(citation_key, []))
return list(self.results)
def test_topic_expander_assigns_relevant_discoveries_back_to_topic():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered1",
fields={
"title": "Abiogenesis and origin chemistry",
"abstract": "A study of abiogenesis pathways.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered2",
fields={
"title": "Galaxy formation dynamics",
"abstract": "Nothing about the topic.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
expander = TopicExpander(
openalex_expander=FakeOpenAlexExpander(
[
ExpansionResult("seed2024", "discovered1", False, "cites", "openalex:cites:seed2024"),
ExpansionResult("seed2024", "discovered2", False, "cites", "openalex:cites:seed2024"),
]
)
)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
min_relevance=0.34,
)
assert len(results) == 2
assigned = {item.discovered_citation_key: item.assigned_to_topic for item in results}
assert assigned["discovered1"] is True
assert assigned["discovered2"] is False
topics = store.get_entry_topics("discovered1")
assert topics[0]["slug"] == "abiogenesis"
assert store.get_entry_topics("discovered2") == []
finally:
store.close()
def test_topic_expander_can_restrict_to_allowed_seed_keys():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
@article{seed2023,
author = {Seed, Bob},
title = {Abiogenesis Historical Seed},
year = {2023}
}
"""
)
for citation_key in ("seed2024", "seed2023"):
store.add_entry_topic(
citation_key,
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="discovered1",
fields={
"title": "Abiogenesis origin chemistry",
"abstract": "A study of abiogenesis chemistry.",
"year": "2025",
},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
expander = TopicExpander(
openalex_expander=FakeOpenAlexExpander(
{"seed2023": [ExpansionResult("seed2023", "discovered1", False, "cites", "openalex:cites:seed2023")]}
)
)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
seed_keys=["seed2024"],
)
assert results == []
assert store.get_entry_topics("discovered1") == []
finally:
store.close()
def test_topic_expander_preview_discovers_without_writing():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.connection.commit()
expander = TopicExpander()
expander._preview_discoveries = lambda *_args, **_kwargs: [ # type: ignore[method-assign]
(
ExpansionResult(
"seed2024",
"preview1",
True,
"cites",
"openalex:cites:seed2024",
),
{
"title": "Abiogenesis origin chemistry",
"abstract": "A study of abiogenesis chemistry.",
"year": "2025",
},
)
]
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis origin chemistry",
min_relevance=0.3,
preview_only=True,
)
assert len(results) == 1
assert results[0].discovered_citation_key == "preview1"
assert results[0].meets_relevance_threshold is True
assert results[0].assigned_to_topic is False
assert results[0].created_entry is True
assert store.get_entry("preview1") is None
assert store.get_entry_topics("preview1") == []
finally:
store.close()
def test_topic_expander_relation_type_both_uses_both_openalex_directions():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
fake_expander = FakeOpenAlexExpander([])
expander = TopicExpander(openalex_expander=fake_expander)
expander.expand_topic(store, "abiogenesis", relation_type="both")
assert [relation for _seed, relation, _limit in fake_expander.calls] == ["cites", "cited_by"]
finally:
store.close()
def test_topic_expander_stops_once_recent_target_is_reached():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="recent1",
fields={"title": "Abiogenesis pathways", "abstract": "abiogenesis", "year": "2026"},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="recent2",
fields={"title": "Abiogenesis chemistry", "abstract": "abiogenesis", "year": "2025"},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
fake_expander = FakeOpenAlexExpander(
{
"seed2024": [ExpansionResult("seed2024", "recent1", False, "cites", "openalex:cites:seed2024")],
"recent1": [ExpansionResult("recent1", "recent2", False, "cites", "openalex:cites:recent1")],
}
)
expander = TopicExpander(openalex_expander=fake_expander)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis chemistry",
max_rounds=3,
recent_years=2,
target_recent_entries=1,
)
assert [item.discovered_citation_key for item in results] == ["recent1"]
assert fake_expander.calls == [("seed2024", "cites", 25)]
finally:
store.close()
def test_topic_relevance_score_expands_human_evolution_terms():
score = _topic_relevance_score(
"human evolution",
{
"title": "Body size and proportions in early hominids",
"abstract": "A fossil and paleolithic perspective on primate ancestry.",
"journal": "Science",
},
)
assert score >= 0.15
def test_topic_assignment_requires_title_anchor():
entry = {
"title": "Phylogenies and the Comparative Method",
"abstract": "A comparative framework for primate and hominid evolution.",
"journal": "Systematic Zoology",
}
score = _topic_relevance_score("human evolution", entry)
assert score >= 0.15
assert _meets_topic_assignment_threshold("human evolution", entry, min_relevance=0.15, relevance_score=score) is False