from citegeist import BibliographyStore from citegeist.bootstrap import Bootstrapper from citegeist.cli import main def test_bootstrap_from_seed_bib_only(): store = BibliographyStore() try: bootstrapper = Bootstrapper() bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap( store, seed_bibtex=""" @article{seed2024, author = {Seed, Alice}, title = {Seed Paper}, year = {2024} } """, expand=False, ) assert [item.citation_key for item in results] == ["seed2024"] assert store.get_entry("seed2024") is not None finally: store.close() def test_bootstrap_from_topic_only(): store = BibliographyStore() try: bootstrapper = Bootstrapper() bootstrapper.resolver.search_openalex = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [ # type: ignore[method-assign] __import__("citegeist").BibEntry( entry_type="article", citation_key="topic2024graph", fields={"title": "Graph Topic Result", "year": "2024"}, ) ] bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap(store, topic="graph topic", expand=False) assert [item.citation_key for item in results] == ["topic2024graph"] assert store.get_entry("topic2024graph") is not None assert results[0].score > 0 finally: store.close() def test_bootstrap_cli_accepts_seed_and_topic(tmp_path): seed_bib = tmp_path / "seed.bib" seed_bib.write_text( """ @article{seed2024, author = {Seed, Alice}, title = {Seed Paper}, year = {2024} } """, encoding="utf-8", ) from unittest.mock import patch database = tmp_path / "library.sqlite3" with patch("citegeist.cli.Bootstrapper.bootstrap") as mocked_bootstrap: mocked_bootstrap.return_value = [] exit_code = main( [ "--db", str(database), "bootstrap", "--seed-bib", str(seed_bib), "--topic", "graph topic", "--no-expand", ] ) assert exit_code == 0 def test_bootstrap_cli_preview_outputs_candidate_metadata(tmp_path, capsys): from unittest.mock import patch from citegeist.bootstrap import BootstrapResult database = tmp_path / "library.sqlite3" with patch("citegeist.cli.Bootstrapper.bootstrap") as mocked_bootstrap: mocked_bootstrap.return_value = [ BootstrapResult( citation_key="openalexw123", origin="topic", created=True, score=4.0, title="Artificial Life and Adaptive Behavior", author="Langton, Christopher G.", year="1989", abstract="A foundational overview of artificial life systems.", ) ] exit_code = main( [ "--db", str(database), "bootstrap", "--topic", "artificial life", "--preview", "--topic-commit-limit", "50", ] ) assert exit_code == 0 payload = capsys.readouterr().out assert "Artificial Life and Adaptive Behavior" in payload assert "Langton, Christopher G." in payload assert "A foundational overview of artificial life systems." in payload def test_bootstrap_ranks_and_deduplicates_topic_candidates(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry( entry_type="article", citation_key="shared2024graph", fields={"title": "Graph Topic Ranking", "abstract": "graph topic graph"}, ) ] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry( entry_type="article", citation_key="shared2024graph", fields={"title": "Graph Topic Ranking", "abstract": "graph"}, ), BibEntry( entry_type="article", citation_key="crossref2024other", fields={"title": "Less relevant paper"}, ), ] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap(store, topic="graph topic", expand=False, topic_limit=5) topic_results = [item for item in results if item.origin == "topic"] assert [item.citation_key for item in topic_results] == ["shared2024graph"] finally: store.close() def test_bootstrap_preview_does_not_write_to_database(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry(entry_type="article", citation_key="preview2024graph", fields={"title": "Preview Graph Topic"}) ] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap(store, topic="graph topic", expand=False, preview_only=True) assert [item.citation_key for item in results] == ["preview2024graph"] assert results[0].title == "Preview Graph Topic" assert store.get_entry("preview2024graph") is None finally: store.close() def test_bootstrap_topic_commit_limit_restricts_persisted_candidates(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry(entry_type="article", citation_key="rank1", fields={"title": "Graph Topic One"}), BibEntry(entry_type="article", citation_key="rank2", fields={"title": "Graph Topic Two"}), ] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap( store, topic="graph topic", expand=False, topic_limit=5, topic_commit_limit=1, ) assert [item.citation_key for item in results if item.origin == "topic"] == ["rank1"] assert store.get_entry("rank1") is not None assert store.get_entry("rank2") is None finally: store.close() def test_bootstrap_topic_candidates_are_attached_to_topic(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry( entry_type="article", citation_key="topic2024graph", fields={"title": "Graph Topic Result", "year": "2024"}, ) ] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] bootstrapper.bootstrap( store, topic="graph topic", topic_slug="graph-topic", topic_name="Graph Topic", topic_phrase="graph topic methods", expand=False, topic_commit_limit=1, ) topic = store.get_topic("graph-topic") assert topic is not None assert topic["entry_count"] == 1 topic_entries = store.list_topic_entries("graph-topic") assert [item["citation_key"] for item in topic_entries] == ["topic2024graph"] assert topic_entries[0]["source_label"] == "topic:graph topic" assert topic_entries[0]["confidence"] > 0 finally: store.close() def test_bootstrap_topic_commit_requires_title_anchor(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry( entry_type="article", citation_key="broad2024", fields={ "title": "The phylum Vertebrata: a case for zoological recognition", "abstract": "Chordata includes Cephalochordata and Urochordata.", "year": "2024", }, ), BibEntry( entry_type="article", citation_key="anchored2024", fields={ "title": "Acraniates and amphioxus in comparative development", "year": "2024", }, ), ] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: [] # type: ignore[method-assign] bootstrapper.openalex_expander.expand_entry = lambda _store, _key, relation_type="cites", limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap( store, topic="acraniates cephalochordata amphioxus lancelet", topic_slug="acraniates", topic_name="Acraniates", expand=False, topic_commit_limit=5, ) assert [item.citation_key for item in results] == ["anchored2024"] topic_entries = store.list_topic_entries("acraniates") assert [item["citation_key"] for item in topic_entries] == ["anchored2024"] assert store.get_entry("broad2024") is None finally: store.close() def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit(): store = BibliographyStore() try: bootstrapper = Bootstrapper() from citegeist import BibEntry bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign] BibEntry( entry_type="article", citation_key=f"rank{index}", fields={ "title": f"Preview Topic Result {index}", "author": f"Author, {index}", "year": f"20{index:02d}", "abstract": f"Abstract {index}", }, ) for index in range(1, 8) ][:limit] bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign] bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign] results = bootstrapper.bootstrap( store, topic="graph topic", expand=False, preview_only=True, topic_limit=5, topic_commit_limit=7, ) assert [item.citation_key for item in results] == [ "rank1", "rank2", "rank3", "rank4", "rank5", "rank6", "rank7", ] assert results[0].author == "Author, 1" assert results[0].year == "2001" assert results[0].abstract == "Abstract 1" finally: store.close()