243 lines
7.3 KiB
Python
243 lines
7.3 KiB
Python
from citegeist.bibtex import BibEntry
|
|
from citegeist.expand import (
|
|
ExpansionResult,
|
|
TopicExpander,
|
|
_meets_topic_assignment_threshold,
|
|
_topic_relevance_score,
|
|
)
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
|
|
class FakeOpenAlexExpander:
|
|
def __init__(self, results: list[ExpansionResult] | dict[str, list[ExpansionResult]]) -> None:
|
|
self.results = results
|
|
|
|
def expand_entry(self, store, citation_key, relation_type="cites", limit=25):
|
|
if isinstance(self.results, dict):
|
|
return list(self.results.get(citation_key, []))
|
|
return list(self.results)
|
|
|
|
|
|
def test_topic_expander_assigns_relevant_discoveries_back_to_topic():
|
|
store = BibliographyStore()
|
|
try:
|
|
store.ingest_bibtex(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Abiogenesis Seed Paper},
|
|
year = {2024}
|
|
}
|
|
"""
|
|
)
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="abiogenesis",
|
|
topic_name="Abiogenesis",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/abiogenesis",
|
|
source_label="seed",
|
|
)
|
|
store.upsert_entry(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="discovered1",
|
|
fields={
|
|
"title": "Abiogenesis and origin chemistry",
|
|
"abstract": "A study of abiogenesis pathways.",
|
|
"year": "2025",
|
|
},
|
|
),
|
|
source_type="graph_expand",
|
|
source_label="test",
|
|
review_status="draft",
|
|
)
|
|
store.upsert_entry(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="discovered2",
|
|
fields={
|
|
"title": "Galaxy formation dynamics",
|
|
"abstract": "Nothing about the topic.",
|
|
"year": "2025",
|
|
},
|
|
),
|
|
source_type="graph_expand",
|
|
source_label="test",
|
|
review_status="draft",
|
|
)
|
|
store.connection.commit()
|
|
|
|
expander = TopicExpander(
|
|
openalex_expander=FakeOpenAlexExpander(
|
|
[
|
|
ExpansionResult("seed2024", "discovered1", False, "cites", "openalex:cites:seed2024"),
|
|
ExpansionResult("seed2024", "discovered2", False, "cites", "openalex:cites:seed2024"),
|
|
]
|
|
)
|
|
)
|
|
|
|
results = expander.expand_topic(
|
|
store,
|
|
"abiogenesis",
|
|
topic_phrase="abiogenesis origin chemistry",
|
|
min_relevance=0.34,
|
|
)
|
|
|
|
assert len(results) == 2
|
|
assigned = {item.discovered_citation_key: item.assigned_to_topic for item in results}
|
|
assert assigned["discovered1"] is True
|
|
assert assigned["discovered2"] is False
|
|
topics = store.get_entry_topics("discovered1")
|
|
assert topics[0]["slug"] == "abiogenesis"
|
|
assert store.get_entry_topics("discovered2") == []
|
|
finally:
|
|
store.close()
|
|
|
|
|
|
def test_topic_expander_can_restrict_to_allowed_seed_keys():
|
|
store = BibliographyStore()
|
|
try:
|
|
store.ingest_bibtex(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Abiogenesis Seed Paper},
|
|
year = {2024}
|
|
}
|
|
|
|
@article{seed2023,
|
|
author = {Seed, Bob},
|
|
title = {Abiogenesis Historical Seed},
|
|
year = {2023}
|
|
}
|
|
"""
|
|
)
|
|
for citation_key in ("seed2024", "seed2023"):
|
|
store.add_entry_topic(
|
|
citation_key,
|
|
topic_slug="abiogenesis",
|
|
topic_name="Abiogenesis",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/abiogenesis",
|
|
source_label="seed",
|
|
)
|
|
store.upsert_entry(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="discovered1",
|
|
fields={
|
|
"title": "Abiogenesis origin chemistry",
|
|
"abstract": "A study of abiogenesis chemistry.",
|
|
"year": "2025",
|
|
},
|
|
),
|
|
source_type="graph_expand",
|
|
source_label="test",
|
|
review_status="draft",
|
|
)
|
|
store.connection.commit()
|
|
|
|
expander = TopicExpander(
|
|
openalex_expander=FakeOpenAlexExpander(
|
|
{"seed2023": [ExpansionResult("seed2023", "discovered1", False, "cites", "openalex:cites:seed2023")]}
|
|
)
|
|
)
|
|
|
|
results = expander.expand_topic(
|
|
store,
|
|
"abiogenesis",
|
|
topic_phrase="abiogenesis origin chemistry",
|
|
seed_keys=["seed2024"],
|
|
)
|
|
|
|
assert results == []
|
|
assert store.get_entry_topics("discovered1") == []
|
|
finally:
|
|
store.close()
|
|
|
|
|
|
def test_topic_expander_preview_discovers_without_writing():
|
|
store = BibliographyStore()
|
|
try:
|
|
store.ingest_bibtex(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Abiogenesis Seed Paper},
|
|
year = {2024}
|
|
}
|
|
"""
|
|
)
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="abiogenesis",
|
|
topic_name="Abiogenesis",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/abiogenesis",
|
|
source_label="seed",
|
|
)
|
|
store.connection.commit()
|
|
|
|
expander = TopicExpander()
|
|
expander._preview_discoveries = lambda *_args, **_kwargs: [ # type: ignore[method-assign]
|
|
(
|
|
ExpansionResult(
|
|
"seed2024",
|
|
"preview1",
|
|
True,
|
|
"cites",
|
|
"openalex:cites:seed2024",
|
|
),
|
|
{
|
|
"title": "Abiogenesis origin chemistry",
|
|
"abstract": "A study of abiogenesis chemistry.",
|
|
"year": "2025",
|
|
},
|
|
)
|
|
]
|
|
|
|
results = expander.expand_topic(
|
|
store,
|
|
"abiogenesis",
|
|
topic_phrase="abiogenesis origin chemistry",
|
|
min_relevance=0.3,
|
|
preview_only=True,
|
|
)
|
|
|
|
assert len(results) == 1
|
|
assert results[0].discovered_citation_key == "preview1"
|
|
assert results[0].meets_relevance_threshold is True
|
|
assert results[0].assigned_to_topic is False
|
|
assert results[0].created_entry is True
|
|
assert store.get_entry("preview1") is None
|
|
assert store.get_entry_topics("preview1") == []
|
|
finally:
|
|
store.close()
|
|
|
|
|
|
def test_topic_relevance_score_expands_human_evolution_terms():
|
|
score = _topic_relevance_score(
|
|
"human evolution",
|
|
{
|
|
"title": "Body size and proportions in early hominids",
|
|
"abstract": "A fossil and paleolithic perspective on primate ancestry.",
|
|
"journal": "Science",
|
|
},
|
|
)
|
|
|
|
assert score >= 0.15
|
|
|
|
|
|
def test_topic_assignment_requires_title_anchor():
|
|
entry = {
|
|
"title": "Phylogenies and the Comparative Method",
|
|
"abstract": "A comparative framework for primate and hominid evolution.",
|
|
"journal": "Systematic Zoology",
|
|
}
|
|
|
|
score = _topic_relevance_score("human evolution", entry)
|
|
|
|
assert score >= 0.15
|
|
assert _meets_topic_assignment_threshold("human evolution", entry, min_relevance=0.15, relevance_score=score) is False
|