CiteGeist/tests/test_batch.py

130 lines
3.9 KiB
Python

from pathlib import Path
from citegeist.batch import BatchBootstrapRunner, load_batch_jobs
from citegeist.cli import main
from citegeist.storage import BibliographyStore
def test_load_batch_jobs_accepts_object_with_jobs(tmp_path: Path):
path = tmp_path / "jobs.json"
path.write_text(
"""
{
"jobs": [
{"name": "topic-only", "topic": "graph topic"},
{"name": "seed-only", "seed_bib": "seed.bib"}
]
}
""",
encoding="utf-8",
)
jobs = load_batch_jobs(path)
assert jobs[0]["name"] == "topic-only"
assert jobs[1]["seed_bib"] == str((tmp_path / "seed.bib").resolve())
def test_batch_runner_executes_multiple_jobs(tmp_path: Path):
seed_bib = tmp_path / "seed.bib"
seed_bib.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
""",
encoding="utf-8",
)
jobs = [
{"name": "seed-job", "seed_bib": str(seed_bib), "expand": False},
{"name": "topic-job", "topic": "graph topic", "expand": False, "preview": True},
]
runner = BatchBootstrapRunner()
from citegeist import BibEntry
runner.bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign]
BibEntry(entry_type="article", citation_key="topic2024graph", fields={"title": "Graph Topic Result"})
]
runner.bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign]
runner.bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign]
store = BibliographyStore()
try:
results = runner.run(store, jobs)
assert [job.job_name for job in results] == ["seed-job", "topic-job"]
assert results[0].result_count == 1
assert results[1].results[0].citation_key == "topic2024graph"
assert store.get_entry("seed2024") is not None
assert store.get_entry("topic2024graph") is None
finally:
store.close()
def test_batch_runner_can_store_topic_phrase_metadata():
jobs = [
{
"name": "topic-job",
"topic": "graph topic",
"topic_slug": "graph-methods",
"topic_name": "Graph Methods",
"topic_phrase": "graph networks biology",
"expand": False,
"preview": False,
}
]
runner = BatchBootstrapRunner()
from citegeist import BibEntry
runner.bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign]
BibEntry(entry_type="article", citation_key="topic2024graph", fields={"title": "Graph Topic Result"})
]
runner.bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign]
runner.bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign]
store = BibliographyStore()
try:
runner.run(store, jobs)
topic = store.get_topic("graph-methods")
assert topic is not None
assert topic["name"] == "Graph Methods"
assert topic["expansion_phrase"] == "graph networks biology"
finally:
store.close()
def test_bootstrap_batch_cli_runs_json_jobs(tmp_path: Path):
seed_bib = tmp_path / "seed.bib"
seed_bib.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
""",
encoding="utf-8",
)
batch_json = tmp_path / "jobs.json"
batch_json.write_text(
f"""
[
{{"name": "seed-job", "seed_bib": "{seed_bib}", "expand": false}},
{{"name": "topic-job", "topic": "graph topic", "expand": false, "preview": true}}
]
""",
encoding="utf-8",
)
from unittest.mock import patch
database = tmp_path / "library.sqlite3"
with patch("citegeist.cli.BatchBootstrapRunner.run") as mocked_run:
mocked_run.return_value = []
exit_code = main(["--db", str(database), "bootstrap-batch", str(batch_json)])
assert exit_code == 0