CiteGeist/tests/test_cli.py

242 lines
6.8 KiB
Python

from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
from unittest.mock import patch
from citegeist.cli import main
SAMPLE_BIB = """
@article{smith2024graphs,
author = {Smith, Jane and Doe, Alex},
title = {Graph-first bibliography augmentation},
year = {2024},
abstract = {We study citation graphs for literature discovery.},
references = {miller2023search}
}
@inproceedings{miller2023search,
author = {Miller, Sam},
title = {Semantic search for research corpora},
year = {2023},
abstract = {Dense retrieval improves recall for academic search.}
}
"""
def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]:
database = tmp_path / "library.sqlite3"
env = {"PYTHONPATH": "src"}
return subprocess.run(
[sys.executable, "-m", "citegeist", "--db", str(database), *args],
cwd=Path(__file__).resolve().parents[1],
env=env,
capture_output=True,
text=True,
check=False,
)
def test_cli_ingest_show_search_and_export(tmp_path: Path):
bib_path = tmp_path / "input.bib"
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
ingest = run_cli(tmp_path, "ingest", str(bib_path))
assert ingest.returncode == 0
assert "smith2024graphs" in ingest.stdout
show = run_cli(tmp_path, "show", "smith2024graphs")
assert show.returncode == 0
payload = json.loads(show.stdout)
assert payload["citation_key"] == "smith2024graphs"
search = run_cli(tmp_path, "search", "semantic")
assert search.returncode == 0
assert "miller2023search" in search.stdout
export_path = tmp_path / "exported.bib"
export_result = run_cli(tmp_path, "export", "--output", str(export_path))
assert export_result.returncode == 0
exported = export_path.read_text(encoding="utf-8")
assert "@article{smith2024graphs," in exported
def test_cli_provenance_and_status_updates(tmp_path: Path):
bib_path = tmp_path / "input.bib"
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
ingest = run_cli(
tmp_path,
"ingest",
"--status",
"draft",
"--source-label",
"tests/input.bib",
str(bib_path),
)
assert ingest.returncode == 0
show = run_cli(tmp_path, "show", "--provenance", "smith2024graphs")
assert show.returncode == 0
payload = json.loads(show.stdout)
assert payload["review_status"] == "draft"
assert payload["field_provenance"][0]["source_label"] == "tests/input.bib"
status = run_cli(tmp_path, "set-status", "smith2024graphs", "reviewed")
assert status.returncode == 0
assert "reviewed" in status.stdout
def test_cli_resolve_updates_entry(tmp_path: Path):
bib_path = tmp_path / "input.bib"
bib_path.write_text(
"""
@article{smith2024graphs,
author = {Smith, Jane},
title = {Graph-first bibliography augmentation},
year = {2024},
doi = {10.1000/example-doi}
}
""",
encoding="utf-8",
)
ingest = run_cli(tmp_path, "ingest", str(bib_path))
assert ingest.returncode == 0
from citegeist.bibtex import BibEntry
from citegeist.resolve import Resolution
database = tmp_path / "library.sqlite3"
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
mocked_resolve.return_value = Resolution(
entry=BibEntry(
entry_type="article",
citation_key="resolvedkey",
fields={
"author": "Smith, Jane",
"title": "Graph-first bibliography augmentation",
"year": "2024",
"doi": "10.1000/example-doi",
"journal": "Journal of Graph Studies",
},
),
source_type="resolver",
source_label="crossref:doi:10.1000/example-doi",
)
exit_code = main(
[
"--db",
str(database),
"resolve",
"smith2024graphs",
]
)
assert exit_code == 0
def test_cli_graph_outputs_missing_targets(tmp_path: Path):
bib_path = tmp_path / "graph.bib"
bib_path.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024},
references = {known2023, missing2022}
}
@article{known2023,
author = {Known, Bob},
title = {Known Paper},
year = {2023}
}
""",
encoding="utf-8",
)
ingest = run_cli(tmp_path, "ingest", str(bib_path))
assert ingest.returncode == 0
graph = run_cli(tmp_path, "graph", "seed2024", "--missing-only")
assert graph.returncode == 0
payload = json.loads(graph.stdout)
assert len(payload) == 1
assert payload[0]["target_citation_key"] == "missing2022"
assert payload[0]["target_exists"] is False
def test_cli_expand_with_mocked_crossref(tmp_path: Path):
bib_path = tmp_path / "expand.bib"
bib_path.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024},
doi = {10.1000/seed-doi}
}
""",
encoding="utf-8",
)
ingest = run_cli(tmp_path, "ingest", str(bib_path))
assert ingest.returncode == 0
from citegeist.expand import ExpansionResult
with patch("citegeist.cli.CrossrefExpander.expand_entry_references") as mocked_expand:
mocked_expand.return_value = [
ExpansionResult(
source_citation_key="seed2024",
discovered_citation_key="doi101000exampleref",
created_entry=True,
relation_type="cites",
source_label="crossref:references:10.1000/seed-doi",
)
]
database = tmp_path / "library.sqlite3"
exit_code = main(["--db", str(database), "expand", "seed2024"])
assert exit_code == 0
def test_cli_expand_with_mocked_openalex(tmp_path: Path):
bib_path = tmp_path / "expand-openalex.bib"
bib_path.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024},
doi = {10.1000/seed-doi}
}
""",
encoding="utf-8",
)
ingest = run_cli(tmp_path, "ingest", str(bib_path))
assert ingest.returncode == 0
from citegeist.expand import ExpansionResult
with patch("citegeist.cli.OpenAlexExpander.expand_entry") as mocked_expand:
mocked_expand.return_value = [
ExpansionResult(
source_citation_key="seed2024",
discovered_citation_key="openalexw12345",
created_entry=True,
relation_type="cites",
source_label="openalex:cites:WSEED",
)
]
database = tmp_path / "library.sqlite3"
exit_code = main(
["--db", str(database), "expand", "seed2024", "--source", "openalex", "--relation", "cites"]
)
assert exit_code == 0