2107 lines
65 KiB
Python
2107 lines
65 KiB
Python
from __future__ import annotations
|
|
|
|
import io
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from contextlib import redirect_stderr, redirect_stdout
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from citegeist.cli import main
|
|
from citegeist.examples.talkorigins import (
|
|
TalkOriginsBatchExport,
|
|
TalkOriginsCorrectionResult,
|
|
TalkOriginsDuplicateCluster,
|
|
TalkOriginsEnrichmentResult,
|
|
TalkOriginsIngestReport,
|
|
TalkOriginsReviewExport,
|
|
TalkOriginsTopicPhraseSuggestion,
|
|
TalkOriginsValidationReport,
|
|
)
|
|
|
|
|
|
SAMPLE_BIB = """
|
|
@article{smith2024graphs,
|
|
author = {Smith, Jane and Doe, Alex},
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024},
|
|
abstract = {We study citation graphs for literature discovery.},
|
|
references = {miller2023search}
|
|
}
|
|
|
|
@inproceedings{miller2023search,
|
|
author = {Miller, Sam},
|
|
title = {Semantic search for research corpora},
|
|
year = {2023},
|
|
abstract = {Dense retrieval improves recall for academic search.}
|
|
}
|
|
"""
|
|
|
|
|
|
def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]:
|
|
database = tmp_path / "library.sqlite3"
|
|
env = {"PYTHONPATH": "src"}
|
|
return subprocess.run(
|
|
[sys.executable, "-m", "citegeist", "--db", str(database), *args],
|
|
cwd=Path(__file__).resolve().parents[1],
|
|
env=env,
|
|
capture_output=True,
|
|
text=True,
|
|
check=False,
|
|
)
|
|
|
|
|
|
def test_cli_ingest_show_search_and_export(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
assert "smith2024graphs" in ingest.stdout
|
|
|
|
show = run_cli(tmp_path, "show", "smith2024graphs")
|
|
assert show.returncode == 0
|
|
payload = json.loads(show.stdout)
|
|
assert payload["citation_key"] == "smith2024graphs"
|
|
|
|
search = run_cli(tmp_path, "search", "semantic")
|
|
assert search.returncode == 0
|
|
assert "miller2023search" in search.stdout
|
|
|
|
export_path = tmp_path / "exported.bib"
|
|
export_result = run_cli(tmp_path, "export", "--output", str(export_path))
|
|
assert export_result.returncode == 0
|
|
exported = export_path.read_text(encoding="utf-8")
|
|
assert "@article{smith2024graphs," in exported
|
|
|
|
|
|
def test_cli_export_skips_stub_entries_by_default_but_can_include_them(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{stubdoi,
|
|
title = {Referenced work 6},
|
|
doi = {10.1200/JCO.2002.04.117},
|
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
|
}
|
|
|
|
@article{realentry,
|
|
author = {Smith, Jane},
|
|
title = {Real Entry},
|
|
year = {2024},
|
|
doi = {10.1000/real}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
default_export = run_cli(tmp_path, "export")
|
|
assert default_export.returncode == 0
|
|
assert "@article{realentry," in default_export.stdout
|
|
assert "@misc{stubdoi," not in default_export.stdout
|
|
|
|
explicit_export = run_cli(tmp_path, "export", "stubdoi")
|
|
assert explicit_export.returncode == 0
|
|
assert "@misc{stubdoi," in explicit_export.stdout
|
|
|
|
include_export = run_cli(tmp_path, "export", "--include-stubs")
|
|
assert include_export.returncode == 0
|
|
assert "@misc{stubdoi," in include_export.stdout
|
|
|
|
|
|
def test_cli_provenance_and_status_updates(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
|
|
|
|
ingest = run_cli(
|
|
tmp_path,
|
|
"ingest",
|
|
"--status",
|
|
"draft",
|
|
"--source-label",
|
|
"tests/input.bib",
|
|
str(bib_path),
|
|
)
|
|
assert ingest.returncode == 0
|
|
|
|
show = run_cli(tmp_path, "show", "--provenance", "smith2024graphs")
|
|
assert show.returncode == 0
|
|
payload = json.loads(show.stdout)
|
|
assert payload["review_status"] == "draft"
|
|
assert payload["field_provenance"][0]["source_label"] == "tests/input.bib"
|
|
|
|
status = run_cli(tmp_path, "set-status", "smith2024graphs", "reviewed")
|
|
assert status.returncode == 0
|
|
assert "reviewed" in status.stdout
|
|
|
|
|
|
def test_cli_verify_string_outputs_json_with_audit_fields(tmp_path: Path):
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.BibliographyVerifier.verify_string") as mocked_verify:
|
|
from citegeist.verify import VerificationResult
|
|
|
|
mocked_verify.return_value = VerificationResult(
|
|
query='"Graph-first bibliography augmentation" Smith 2024',
|
|
context="citation graphs",
|
|
status="high_confidence",
|
|
confidence=0.82,
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="smith2024graphs",
|
|
fields={
|
|
"author": "Smith, Jane",
|
|
"title": "Graph-first bibliography augmentation",
|
|
"year": "2024",
|
|
"doi": "10.1000/example-doi",
|
|
},
|
|
),
|
|
source_label="crossref:search:Graph-first bibliography augmentation",
|
|
alternates=[],
|
|
input_type="string",
|
|
input_key=None,
|
|
)
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with redirect_stdout(stdout_buffer):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"verify",
|
|
"--string",
|
|
'"Graph-first bibliography augmentation" Smith 2024',
|
|
"--context",
|
|
"citation graphs",
|
|
"--format",
|
|
"json",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
payload = json.loads(stdout_buffer.getvalue())
|
|
assert payload[0]["status"] == "high_confidence"
|
|
assert payload[0]["source_label"] == "crossref:search:Graph-first bibliography augmentation"
|
|
assert payload[0]["entry"]["citation_key"] == "smith2024graphs"
|
|
|
|
|
|
def test_cli_verify_bib_outputs_json(tmp_path: Path):
|
|
bib_path = tmp_path / "partial.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{roughentry,
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
with patch("citegeist.cli.BibliographyVerifier.verify_bib_file") as mocked_verify:
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.verify import VerificationResult
|
|
|
|
mocked_verify.return_value = [
|
|
VerificationResult(
|
|
query="Graph-first bibliography augmentation 2024",
|
|
context="",
|
|
status="ambiguous",
|
|
confidence=0.61,
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="candidate2024",
|
|
fields={
|
|
"title": "Graph-first bibliography augmentation",
|
|
"year": "2024",
|
|
},
|
|
),
|
|
source_label="openalex:search:Graph-first bibliography augmentation",
|
|
alternates=[],
|
|
input_type="bib",
|
|
input_key="roughentry",
|
|
)
|
|
]
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with redirect_stdout(stdout_buffer):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(tmp_path / "library.sqlite3"),
|
|
"verify",
|
|
"--bib",
|
|
str(bib_path),
|
|
"--format",
|
|
"json",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
payload = json.loads(stdout_buffer.getvalue())
|
|
assert payload[0]["status"] == "ambiguous"
|
|
assert payload[0]["input_key"] == "roughentry"
|
|
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
|
|
|
|
|
def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
|
|
stderr_buffer = io.StringIO()
|
|
with redirect_stderr(stderr_buffer):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(tmp_path / "library.sqlite3"),
|
|
"verify",
|
|
"--string",
|
|
"Evans 1960",
|
|
"--llm",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 1
|
|
assert "--llm requires --llm-base-url and --llm-model" in stderr_buffer.getvalue()
|
|
|
|
|
|
def test_cli_verify_builds_llm_config(tmp_path: Path):
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.verify import VerificationResult
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.BibliographyVerifier") as mocked_verifier_cls:
|
|
mocked_verifier = mocked_verifier_cls.return_value
|
|
mocked_verifier.verify_string.return_value = VerificationResult(
|
|
query="Evans 1960",
|
|
context="marine mammals",
|
|
status="ambiguous",
|
|
confidence=0.6,
|
|
entry=BibEntry(entry_type="misc", citation_key="evans1960", fields={"title": "Evans 1960"}),
|
|
source_label="none",
|
|
alternates=[],
|
|
input_type="string",
|
|
input_key=None,
|
|
)
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with redirect_stdout(stdout_buffer):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"verify",
|
|
"--string",
|
|
"Evans 1960",
|
|
"--llm",
|
|
"--llm-base-url",
|
|
"http://localhost:11434",
|
|
"--llm-model",
|
|
"qwen3",
|
|
"--llm-role",
|
|
"rerank",
|
|
"--format",
|
|
"json",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
kwargs = mocked_verifier_cls.call_args.kwargs
|
|
assert kwargs["llm_config"].base_url == "http://localhost:11434"
|
|
assert kwargs["llm_config"].model == "qwen3"
|
|
assert kwargs["llm_config"].role == "rerank"
|
|
|
|
|
|
def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
|
|
bib_path = tmp_path / "jabref-library.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{smith2024graphs,
|
|
author = {Smith, Jane},
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
output_path = tmp_path / "jabref-library.enriched.bib"
|
|
|
|
class FakeStore:
|
|
def __init__(self) -> None:
|
|
self.ingest_calls: list[tuple[str, str, str]] = []
|
|
self.closed = False
|
|
|
|
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
|
self.ingest_calls.append((text, source_label, review_status))
|
|
return ["smith2024graphs"]
|
|
|
|
def get_bib_entry(self, citation_key: str):
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
return BibEntry("article", citation_key, {"title": "Resolved Work"})
|
|
|
|
def get_entry(self, citation_key: str):
|
|
return {"citation_key": citation_key, "review_status": "enriched"}
|
|
|
|
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
|
return []
|
|
|
|
def get_field_provenance(self, citation_key: str):
|
|
return []
|
|
|
|
def close(self) -> None:
|
|
self.closed = True
|
|
|
|
fake_store = FakeStore()
|
|
resolve_calls: list[str] = []
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with (
|
|
patch("citegeist.cli.BibliographyStore", return_value=fake_store),
|
|
patch("citegeist.cli.MetadataResolver"),
|
|
patch("citegeist.cli.render_bibtex", return_value="@article{smith2024graphs,\n title = {Resolved Work}\n}"),
|
|
patch(
|
|
"citegeist.cli._resolve_one",
|
|
side_effect=lambda store, resolver, citation_key: resolve_calls.append(citation_key) or True,
|
|
),
|
|
redirect_stdout(stdout_buffer),
|
|
):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(tmp_path / "library.sqlite3"),
|
|
"sync-jabref",
|
|
str(bib_path),
|
|
"--output",
|
|
str(output_path),
|
|
"--status",
|
|
"draft",
|
|
"--source-label",
|
|
"jabref:test",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
assert fake_store.ingest_calls[0][1:] == ("jabref:test", "draft")
|
|
assert resolve_calls == ["smith2024graphs"]
|
|
assert "@article{smith2024graphs," in output_path.read_text(encoding="utf-8")
|
|
payload = json.loads(stdout_buffer.getvalue())
|
|
assert payload["imported_count"] == 1
|
|
assert payload["resolved_count"] == 1
|
|
assert payload["failed_resolve_count"] == 0
|
|
assert payload["skipped_resolution"] is False
|
|
|
|
|
|
def test_cli_sync_jabref_can_skip_resolution(tmp_path: Path):
|
|
bib_path = tmp_path / "jabref-library.bib"
|
|
bib_path.write_text("@article{seed2024, title = {Seed}}\n", encoding="utf-8")
|
|
output_path = tmp_path / "jabref-library.enriched.bib"
|
|
|
|
class FakeStore:
|
|
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
|
return ["seed2024"]
|
|
|
|
def get_bib_entry(self, citation_key: str):
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
return BibEntry("article", citation_key, {"title": "Seed"})
|
|
|
|
def get_entry(self, citation_key: str):
|
|
return {"citation_key": citation_key, "review_status": "draft"}
|
|
|
|
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
|
return []
|
|
|
|
def get_field_provenance(self, citation_key: str):
|
|
return []
|
|
|
|
def close(self) -> None:
|
|
return None
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with (
|
|
patch("citegeist.cli.BibliographyStore", return_value=FakeStore()),
|
|
patch("citegeist.cli.render_bibtex", return_value="@article{seed2024,\n title = {Seed}\n}"),
|
|
patch("citegeist.cli._resolve_one") as mocked_resolve,
|
|
redirect_stdout(stdout_buffer),
|
|
):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(tmp_path / "library.sqlite3"),
|
|
"sync-jabref",
|
|
str(bib_path),
|
|
"--output",
|
|
str(output_path),
|
|
"--no-resolve",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
mocked_resolve.assert_not_called()
|
|
payload = json.loads(stdout_buffer.getvalue())
|
|
assert payload["skipped_resolution"] is True
|
|
assert payload["resolved_count"] == 0
|
|
|
|
|
|
def test_cli_sync_jabref_can_annotate_review_fields_and_write_in_place(tmp_path: Path):
|
|
bib_path = tmp_path / "jabref-library.bib"
|
|
bib_path.write_text("@article{seed2024, title = {Seed}}\n", encoding="utf-8")
|
|
|
|
class FakeStore:
|
|
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
|
return ["seed2024"]
|
|
|
|
def get_bib_entry(self, citation_key: str):
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
return BibEntry("article", citation_key, {"title": "Seed"})
|
|
|
|
def get_entry(self, citation_key: str):
|
|
return {"citation_key": citation_key, "review_status": "enriched"}
|
|
|
|
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
|
return [{"field_name": "title"}]
|
|
|
|
def get_field_provenance(self, citation_key: str):
|
|
return [{"source_label": "pubmed:pmid:12345678"}]
|
|
|
|
def close(self) -> None:
|
|
return None
|
|
|
|
stdout_buffer = io.StringIO()
|
|
with (
|
|
patch("citegeist.cli.BibliographyStore", return_value=FakeStore()),
|
|
patch(
|
|
"citegeist.cli.render_bibtex",
|
|
side_effect=lambda entries: "\n".join(
|
|
[
|
|
"@article{seed2024,",
|
|
f" title = {{{entries[0].fields['title']}}},",
|
|
f" x_citegeist_review_status = {{{entries[0].fields.get('x_citegeist_review_status', '')}}},",
|
|
f" x_citegeist_open_conflicts = {{{entries[0].fields.get('x_citegeist_open_conflicts', '')}}},",
|
|
f" x_citegeist_last_source = {{{entries[0].fields.get('x_citegeist_last_source', '')}}}",
|
|
"}",
|
|
]
|
|
),
|
|
),
|
|
patch("citegeist.cli._resolve_one", return_value=True),
|
|
redirect_stdout(stdout_buffer),
|
|
):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(tmp_path / "library.sqlite3"),
|
|
"sync-jabref",
|
|
str(bib_path),
|
|
"--in-place",
|
|
"--annotate-review",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
rendered = bib_path.read_text(encoding="utf-8")
|
|
assert "x_citegeist_review_status" in rendered
|
|
assert "x_citegeist_open_conflicts" in rendered
|
|
assert "x_citegeist_last_source" in rendered
|
|
payload = json.loads(stdout_buffer.getvalue())
|
|
assert payload["in_place"] is True
|
|
assert payload["annotated_review"] is True
|
|
|
|
|
|
def test_cli_resolve_updates_entry(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{smith2024graphs,
|
|
author = {Smith, Jane},
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024},
|
|
doi = {10.1000/example-doi}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.resolve import Resolution
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
|
|
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
|
|
mocked_resolve.return_value = Resolution(
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="resolvedkey",
|
|
fields={
|
|
"author": "Smith, Jane",
|
|
"title": "Resolved Graph-first bibliography augmentation",
|
|
"year": "2024",
|
|
"doi": "10.1000/example-doi",
|
|
"journal": "Journal of Graph Studies",
|
|
},
|
|
),
|
|
source_type="resolver",
|
|
source_label="crossref:doi:10.1000/example-doi",
|
|
)
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"resolve",
|
|
"smith2024graphs",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
show = run_cli(tmp_path, "show", "--conflicts", "smith2024graphs")
|
|
assert show.returncode == 0
|
|
payload = json.loads(show.stdout)
|
|
assert payload["field_conflicts"][0]["field_name"] == "title"
|
|
|
|
|
|
def test_cli_resolve_stubs_preview_lists_doi_stub_candidates(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{stubdoi,
|
|
title = {Referenced work 6},
|
|
doi = {10.1200/JCO.2002.04.117},
|
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
|
}
|
|
|
|
@article{complete,
|
|
author = {Smith, Jane},
|
|
title = {Complete Record},
|
|
year = {2024},
|
|
doi = {10.1000/complete}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
result = run_cli(tmp_path, "resolve-stubs", "--doi-only", "--preview", "--limit", "10")
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert [row["citation_key"] for row in payload] == ["stubdoi"]
|
|
assert payload[0]["title"] == "Referenced work 6"
|
|
|
|
|
|
def test_cli_resolve_stubs_preview_can_target_all_misc_entries(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{miscwithtitle,
|
|
author = {Doe, Alex},
|
|
title = {Avida Conference Record},
|
|
year = {2005},
|
|
doi = {10.1117/12.512613}
|
|
}
|
|
|
|
@article{complete,
|
|
author = {Smith, Jane},
|
|
title = {Complete Record},
|
|
year = {2024},
|
|
doi = {10.1000/complete}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
result = run_cli(tmp_path, "resolve-stubs", "--doi-only", "--all-misc", "--preview", "--limit", "10")
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert [row["citation_key"] for row in payload] == ["miscwithtitle"]
|
|
|
|
|
|
def test_cli_resolve_stubs_enriches_matching_candidates(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{stubdoi,
|
|
title = {Referenced work 6},
|
|
doi = {10.1200/JCO.2002.04.117},
|
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.resolve import Resolution
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
|
|
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
|
|
mocked_resolve.return_value = Resolution(
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="resolvedkey",
|
|
fields={
|
|
"author": "Doe, Alex",
|
|
"title": "Resolved Work",
|
|
"year": "2002",
|
|
"doi": "10.1200/JCO.2002.04.117",
|
|
"journal": "Journal of Clinical Oncology",
|
|
},
|
|
),
|
|
source_type="resolver",
|
|
source_label="crossref:doi:10.1200/JCO.2002.04.117",
|
|
)
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"resolve-stubs",
|
|
"--doi-only",
|
|
"--limit",
|
|
"10",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
show = run_cli(tmp_path, "show", "stubdoi")
|
|
payload = json.loads(show.stdout)
|
|
assert payload["title"] == "Resolved Work"
|
|
assert payload["review_status"] == "enriched"
|
|
|
|
|
|
def test_cli_resolve_stubs_can_enrich_all_misc_entries(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{miscwithtitle,
|
|
author = {Doe, Alex},
|
|
title = {Avida Conference Record},
|
|
year = {2005},
|
|
doi = {10.1117/12.512613}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.resolve import Resolution
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
|
|
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
|
|
mocked_resolve.return_value = Resolution(
|
|
entry=BibEntry(
|
|
entry_type="inproceedings",
|
|
citation_key="resolvedkey",
|
|
fields={
|
|
"author": "Koza, J. R.",
|
|
"title": "Genetic Programming IV: Routine Human-Competitive Machine Intelligence",
|
|
"year": "2005",
|
|
"booktitle": "Genetic and Evolutionary Computation Conference",
|
|
"doi": "10.1117/12.512613",
|
|
},
|
|
),
|
|
source_type="resolver",
|
|
source_label="crossref:doi:10.1117/12.512613",
|
|
)
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"resolve-stubs",
|
|
"--doi-only",
|
|
"--all-misc",
|
|
"--limit",
|
|
"10",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
show = run_cli(tmp_path, "show", "--conflicts", "miscwithtitle")
|
|
payload = json.loads(show.stdout)
|
|
assert payload["entry_type"] == "inproceedings"
|
|
assert payload["title"] == "Avida Conference Record"
|
|
assert payload["booktitle"] == "Genetic and Evolutionary Computation Conference"
|
|
assert "title" in {item["field_name"] for item in payload["field_conflicts"]}
|
|
|
|
|
|
def test_cli_resolve_stubs_reports_progress_on_stderr(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{stubdoi,
|
|
title = {Referenced work 6},
|
|
doi = {10.1200/JCO.2002.04.117},
|
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.resolve import Resolution
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
|
|
mocked_resolve.return_value = Resolution(
|
|
entry=BibEntry(
|
|
entry_type="article",
|
|
citation_key="resolvedkey",
|
|
fields={"title": "Resolved Work", "year": "2002", "doi": "10.1200/JCO.2002.04.117"},
|
|
),
|
|
source_type="resolver",
|
|
source_label="crossref:doi:10.1200/JCO.2002.04.117",
|
|
)
|
|
stdout_buffer = io.StringIO()
|
|
stderr_buffer = io.StringIO()
|
|
with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"resolve-stubs",
|
|
"--doi-only",
|
|
"--limit",
|
|
"10",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
assert "[1/1] resolving candidate: stubdoi" in stderr_buffer.getvalue()
|
|
assert "stubdoi\tcrossref:doi:10.1200/JCO.2002.04.117" in stdout_buffer.getvalue()
|
|
|
|
|
|
def test_cli_resolve_conflicts_updates_status(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{smith2024graphs,
|
|
author = {Smith, Jane},
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.record_conflicts(
|
|
"smith2024graphs",
|
|
[
|
|
{
|
|
"field_name": "title",
|
|
"current_value": "Graph-first bibliography augmentation",
|
|
"proposed_value": "Resolved title",
|
|
}
|
|
],
|
|
source_type="resolver",
|
|
source_label="openalex:search:Graph-first bibliography augmentation",
|
|
)
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(tmp_path, "resolve-conflicts", "smith2024graphs", "title", "accepted")
|
|
assert result.returncode == 0
|
|
assert "accepted" in result.stdout
|
|
|
|
|
|
def test_cli_apply_conflict_updates_entry_value(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{smith2024graphs,
|
|
author = {Smith, Jane},
|
|
title = {Graph-first bibliography augmentation},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.record_conflicts(
|
|
"smith2024graphs",
|
|
[
|
|
{
|
|
"field_name": "title",
|
|
"current_value": "Graph-first bibliography augmentation",
|
|
"proposed_value": "Resolved Graph-first bibliography augmentation",
|
|
}
|
|
],
|
|
source_type="resolver",
|
|
source_label="openalex:search:Graph-first bibliography augmentation",
|
|
)
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(tmp_path, "apply-conflict", "smith2024graphs", "title")
|
|
assert result.returncode == 0
|
|
assert "applied" in result.stdout
|
|
|
|
show = run_cli(tmp_path, "show", "smith2024graphs")
|
|
payload = json.loads(show.stdout)
|
|
assert payload["title"] == "Resolved Graph-first bibliography augmentation"
|
|
|
|
|
|
def test_cli_discover_oai_outputs_identity_and_sets():
|
|
from unittest.mock import patch
|
|
from citegeist.harvest import OaiMetadataFormat, OaiSet
|
|
|
|
with patch("citegeist.cli.OaiPmhHarvester.identify") as mocked_identify, patch(
|
|
"citegeist.cli.OaiPmhHarvester.list_sets"
|
|
) as mocked_sets, patch("citegeist.cli.OaiPmhHarvester.list_metadata_formats") as mocked_formats:
|
|
mocked_identify.return_value = {
|
|
"repositoryName": "Example Repository",
|
|
"granularity": "YYYY-MM-DD",
|
|
}
|
|
mocked_formats.return_value = [
|
|
OaiMetadataFormat(
|
|
metadata_prefix="oai_dc",
|
|
schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
|
|
metadata_namespace="http://www.openarchives.org/OAI/2.0/oai_dc/",
|
|
)
|
|
]
|
|
mocked_sets.return_value = [
|
|
OaiSet(set_spec="theses", set_name="Theses", set_description="Graduate theses")
|
|
]
|
|
exit_code = main(["discover-oai", "https://example.edu/oai"])
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_bootstrap_preview_mode(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.Bootstrapper.bootstrap") as mocked_bootstrap:
|
|
mocked_bootstrap.return_value = []
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"bootstrap",
|
|
"--topic",
|
|
"graph topic",
|
|
"--preview",
|
|
"--topic-commit-limit",
|
|
"2",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
_, kwargs = mocked_bootstrap.call_args
|
|
assert kwargs["preview_only"] is True
|
|
assert kwargs["topic_commit_limit"] == 2
|
|
|
|
|
|
def test_cli_bootstrap_accepts_stored_topic_metadata(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.Bootstrapper.bootstrap") as mocked_bootstrap:
|
|
mocked_bootstrap.return_value = []
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"bootstrap",
|
|
"--topic",
|
|
"graph topic",
|
|
"--topic-slug",
|
|
"graph-methods",
|
|
"--topic-name",
|
|
"Graph Methods",
|
|
"--store-topic-phrase",
|
|
"graph networks biology",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
_, kwargs = mocked_bootstrap.call_args
|
|
assert kwargs["topic_slug"] == "graph-methods"
|
|
assert kwargs["topic_name"] == "Graph Methods"
|
|
assert kwargs["topic_phrase"] == "graph networks biology"
|
|
|
|
|
|
def test_cli_scrape_talkorigins_accepts_output_dir(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
with patch("citegeist.cli.TalkOriginsScraper.scrape_to_directory") as mocked_scrape:
|
|
mocked_scrape.return_value = TalkOriginsBatchExport(
|
|
base_url="https://www.talkorigins.org/origins/biblio/",
|
|
output_dir=str(tmp_path),
|
|
topic_count=1,
|
|
entry_count=2,
|
|
jobs_path=str(tmp_path / "jobs.json"),
|
|
manifest_path=str(tmp_path / "manifest.json"),
|
|
seed_sets=[],
|
|
)
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"example-talkorigins-scrape",
|
|
str(tmp_path / "talkorigins-out"),
|
|
"--limit-topics",
|
|
"3",
|
|
"--limit-entries-per-topic",
|
|
"10",
|
|
"--no-resume",
|
|
"--no-expand",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_validate_talkorigins_accepts_manifest(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
with patch("citegeist.cli.TalkOriginsScraper.validate_export") as mocked_validate:
|
|
mocked_validate.return_value = TalkOriginsValidationReport(
|
|
manifest_path=str(manifest),
|
|
topic_count=1,
|
|
entry_count=2,
|
|
parsed_ratio=1.0,
|
|
missing_author_count=0,
|
|
missing_title_count=0,
|
|
missing_year_count=0,
|
|
suspicious_entry_type_count=0,
|
|
suspicious_examples=[],
|
|
duplicate_cluster_count=0,
|
|
duplicate_entry_count=0,
|
|
duplicate_examples=[],
|
|
)
|
|
exit_code = main(["example-talkorigins-validate", str(manifest)])
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_suggest_talkorigins_phrases_writes_output(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
output = tmp_path / "phrases.json"
|
|
with patch("citegeist.cli.TalkOriginsScraper.suggest_topic_phrases") as mocked_suggest:
|
|
mocked_suggest.return_value = [
|
|
TalkOriginsTopicPhraseSuggestion(
|
|
slug="abiogenesis",
|
|
topic="Abiogenesis",
|
|
entry_count=2,
|
|
suggested_phrase="Abiogenesis prebiotic chemistry ribozyme",
|
|
keywords=["prebiotic", "chemistry", "ribozyme"],
|
|
review_required=True,
|
|
review_reasons=["small_topic"],
|
|
)
|
|
]
|
|
exit_code = main(
|
|
[
|
|
"example-talkorigins-suggest-phrases",
|
|
str(manifest),
|
|
"--topic",
|
|
"abiogenesis",
|
|
"--output",
|
|
str(output),
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
payload = json.loads(output.read_text(encoding="utf-8"))
|
|
assert payload[0]["slug"] == "abiogenesis"
|
|
|
|
|
|
def test_cli_duplicates_talkorigins_accepts_manifest(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
with patch("citegeist.cli.TalkOriginsScraper.inspect_duplicate_clusters") as mocked_duplicates:
|
|
mocked_duplicates.return_value = [
|
|
TalkOriginsDuplicateCluster(
|
|
key="smith|1999|duplicate paper",
|
|
count=2,
|
|
items=[
|
|
{
|
|
"citation_key": "dup1",
|
|
"title": "Duplicate Paper",
|
|
"author": "Smith, Jane",
|
|
"year": "1999",
|
|
"seed_bib": "a.bib",
|
|
"topic": "Abiogenesis",
|
|
"topic_slug": "abiogenesis",
|
|
}
|
|
],
|
|
canonical={
|
|
"citation_key": "dup1",
|
|
"entry_type": "article",
|
|
"field_count": 3,
|
|
"fields": {"title": "Duplicate Paper"},
|
|
"weak_reasons": [],
|
|
},
|
|
)
|
|
]
|
|
exit_code = main(
|
|
[
|
|
"example-talkorigins-duplicates",
|
|
str(manifest),
|
|
"--topic",
|
|
"abiogenesis",
|
|
"--match",
|
|
"duplicate",
|
|
"--preview",
|
|
"--weak-only",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_ingest_talkorigins_accepts_manifest(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
with patch("citegeist.cli.TalkOriginsScraper.ingest_export") as mocked_ingest:
|
|
mocked_ingest.return_value = TalkOriginsIngestReport(
|
|
manifest_path=str(manifest),
|
|
topic_count=1,
|
|
raw_entry_count=2,
|
|
stored_entry_count=1,
|
|
duplicate_cluster_count=1,
|
|
duplicate_entry_count=2,
|
|
canonicalized_count=1,
|
|
)
|
|
exit_code = main(["--db", str(database), "example-talkorigins-ingest", str(manifest)])
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_enrich_talkorigins_accepts_manifest(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
with patch("citegeist.cli.TalkOriginsScraper.enrich_weak_canonicals") as mocked_enrich:
|
|
mocked_enrich.return_value = [
|
|
TalkOriginsEnrichmentResult(
|
|
key="smith|1999|duplicate paper",
|
|
citation_key="dup1",
|
|
weak_reasons_before=["missing:doi"],
|
|
resolved=True,
|
|
applied=False,
|
|
source_label="crossref:search:Duplicate Paper",
|
|
weak_reasons_after=[],
|
|
conflicts=[],
|
|
error="",
|
|
)
|
|
]
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"example-talkorigins-enrich",
|
|
str(manifest),
|
|
"--limit",
|
|
"5",
|
|
"--apply",
|
|
"--allow-unsafe-search-matches",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_review_talkorigins_writes_output(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
output = tmp_path / "review.json"
|
|
with patch("citegeist.cli.TalkOriginsScraper.build_review_export") as mocked_review:
|
|
mocked_review.return_value = TalkOriginsReviewExport(
|
|
manifest_path=str(manifest),
|
|
item_count=1,
|
|
items=[{"key": "smith|1999|duplicate paper", "canonical": {}, "enrichment": {}}],
|
|
)
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"example-talkorigins-review",
|
|
str(manifest),
|
|
"--output",
|
|
str(output),
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
assert output.exists()
|
|
|
|
|
|
def test_cli_apply_talkorigins_corrections_accepts_files(tmp_path):
|
|
from unittest.mock import patch
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
manifest = tmp_path / "talkorigins_manifest.json"
|
|
corrections = tmp_path / "corrections.json"
|
|
manifest.write_text("{}", encoding="utf-8")
|
|
corrections.write_text('{"corrections": []}', encoding="utf-8")
|
|
with patch("citegeist.cli.TalkOriginsScraper.apply_review_corrections") as mocked_apply:
|
|
mocked_apply.return_value = [
|
|
TalkOriginsCorrectionResult(
|
|
key="smith|1999|duplicate paper",
|
|
citation_key="dup1",
|
|
applied=True,
|
|
error="",
|
|
)
|
|
]
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"example-talkorigins-apply-corrections",
|
|
str(manifest),
|
|
str(corrections),
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_topics_and_topic_entries(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
topics = run_cli(tmp_path, "topics")
|
|
assert topics.returncode == 0
|
|
topics_payload = json.loads(topics.stdout)
|
|
assert topics_payload[0]["slug"] == "graph-methods"
|
|
|
|
topic_entries = run_cli(tmp_path, "topic-entries", "graph-methods")
|
|
assert topic_entries.returncode == 0
|
|
topic_payload = json.loads(topic_entries.stdout)
|
|
assert topic_payload["topic"]["slug"] == "graph-methods"
|
|
assert topic_payload["entries"][0]["citation_key"] == "seed2024"
|
|
|
|
|
|
def test_cli_can_set_topic_phrase(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(tmp_path, "set-topic-phrase", "graph-methods", "graph networks biology")
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert payload["expansion_phrase"] == "graph networks biology"
|
|
|
|
|
|
def test_cli_can_apply_topic_phrases_from_json(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
phrases_path = tmp_path / "phrases.json"
|
|
phrases_path.write_text(
|
|
json.dumps(
|
|
[
|
|
{
|
|
"slug": "graph-methods",
|
|
"suggested_phrase": "graph networks biology",
|
|
}
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
result = run_cli(tmp_path, "apply-topic-phrases", str(phrases_path))
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert payload[0]["applied"] is True
|
|
|
|
check = run_cli(tmp_path, "topics")
|
|
topics_payload = json.loads(check.stdout)
|
|
assert topics_payload[0]["expansion_phrase"] == "graph networks biology"
|
|
|
|
|
|
def test_cli_can_stage_topic_phrases_from_json(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
phrases_path = tmp_path / "phrases.json"
|
|
phrases_path.write_text(
|
|
json.dumps(
|
|
[
|
|
{
|
|
"slug": "graph-methods",
|
|
"suggested_phrase": "graph networks biology",
|
|
}
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
result = run_cli(tmp_path, "stage-topic-phrases", str(phrases_path))
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert payload[0]["staged"] is True
|
|
assert payload[0]["phrase_review_status"] == "pending"
|
|
|
|
check = run_cli(tmp_path, "topics")
|
|
topics_payload = json.loads(check.stdout)
|
|
assert topics_payload[0]["suggested_phrase"] == "graph networks biology"
|
|
assert topics_payload[0]["expansion_phrase"] is None
|
|
assert topics_payload[0]["phrase_review_status"] == "pending"
|
|
|
|
|
|
def test_cli_can_review_topic_phrase(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.stage_topic_phrase_suggestion("graph-methods", "graph networks biology")
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(
|
|
tmp_path,
|
|
"review-topic-phrase",
|
|
"graph-methods",
|
|
"accepted",
|
|
"--notes",
|
|
"curated and approved",
|
|
)
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert payload["suggested_phrase"] is None
|
|
assert payload["expansion_phrase"] == "graph networks biology"
|
|
assert payload["phrase_review_status"] == "accepted"
|
|
assert payload["phrase_review_notes"] == "curated and approved"
|
|
|
|
|
|
def test_cli_topics_can_filter_by_phrase_review_status(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.ensure_topic("abiogenesis", "Abiogenesis")
|
|
store.stage_topic_phrase_suggestion("graph-methods", "graph networks biology")
|
|
store.stage_topic_phrase_suggestion("abiogenesis", "abiogenesis life origin")
|
|
store.review_topic_phrase_suggestion("abiogenesis", "accepted")
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(tmp_path, "topics", "--phrase-review-status", "pending")
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert [topic["slug"] for topic in payload] == ["graph-methods"]
|
|
|
|
|
|
def test_cli_can_list_topic_phrase_reviews(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.ensure_topic("abiogenesis", "Abiogenesis")
|
|
store.stage_topic_phrase_suggestion("graph-methods", "graph networks biology")
|
|
store.stage_topic_phrase_suggestion("abiogenesis", "abiogenesis life origin")
|
|
store.review_topic_phrase_suggestion("abiogenesis", "accepted")
|
|
finally:
|
|
store.close()
|
|
|
|
result = run_cli(tmp_path, "topic-phrase-reviews", "--phrase-review-status", "pending")
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert [review["slug"] for review in payload] == ["graph-methods"]
|
|
assert payload[0]["suggested_phrase"] == "graph networks biology"
|
|
assert payload[0]["phrase_review_status"] == "pending"
|
|
|
|
|
|
def test_cli_can_review_topic_phrases_in_bulk(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.ensure_topic("abiogenesis", "Abiogenesis")
|
|
store.stage_topic_phrase_suggestion("graph-methods", "graph networks biology")
|
|
store.stage_topic_phrase_suggestion("abiogenesis", "abiogenesis life origin")
|
|
finally:
|
|
store.close()
|
|
|
|
review_path = tmp_path / "phrase-review.json"
|
|
review_path.write_text(
|
|
json.dumps(
|
|
[
|
|
{
|
|
"slug": "graph-methods",
|
|
"status": "accepted",
|
|
"review_notes": "good phrase",
|
|
},
|
|
{
|
|
"slug": "abiogenesis",
|
|
"status": "rejected",
|
|
"review_notes": "too sparse",
|
|
},
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
result = run_cli(tmp_path, "review-topic-phrases", str(review_path))
|
|
assert result.returncode == 0
|
|
payload = json.loads(result.stdout)
|
|
assert payload[0]["reviewed"] is True
|
|
assert payload[1]["reviewed"] is True
|
|
|
|
pending_result = run_cli(tmp_path, "topic-phrase-reviews", "--phrase-review-status", "pending")
|
|
assert pending_result.returncode == 0
|
|
assert json.loads(pending_result.stdout) == []
|
|
|
|
rejected_result = run_cli(tmp_path, "topic-phrase-reviews", "--phrase-review-status", "rejected")
|
|
assert rejected_result.returncode == 0
|
|
rejected_payload = json.loads(rejected_result.stdout)
|
|
assert [review["slug"] for review in rejected_payload] == ["abiogenesis"]
|
|
|
|
topics_result = run_cli(tmp_path, "topics", "--phrase-review-status", "accepted")
|
|
assert topics_result.returncode == 0
|
|
topics_payload = json.loads(topics_result.stdout)
|
|
assert [topic["slug"] for topic in topics_payload] == ["graph-methods"]
|
|
|
|
|
|
def test_cli_can_export_topic_phrase_review_template(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.stage_topic_phrase_suggestion("graph-methods", "graph networks biology")
|
|
finally:
|
|
store.close()
|
|
|
|
output_path = tmp_path / "topic-phrase-review.json"
|
|
result = run_cli(
|
|
tmp_path,
|
|
"export-topic-phrase-reviews",
|
|
"--output",
|
|
str(output_path),
|
|
)
|
|
assert result.returncode == 0
|
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
|
assert [item["slug"] for item in payload] == ["graph-methods"]
|
|
assert payload[0]["current_expansion_phrase"] is None
|
|
assert payload[0]["suggested_phrase"] == "graph networks biology"
|
|
assert payload[0]["current_status"] == "pending"
|
|
assert payload[0]["status"] == ""
|
|
assert payload[0]["phrase"] == "graph networks biology"
|
|
|
|
|
|
def test_cli_export_topic(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/graph-methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
export_path = tmp_path / "graph-methods.bib"
|
|
result = run_cli(tmp_path, "export-topic", "graph-methods", "--output", str(export_path))
|
|
assert result.returncode == 0
|
|
exported = export_path.read_text(encoding="utf-8")
|
|
assert "@article{seed2024," in exported
|
|
|
|
|
|
def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@misc{stubdoi,
|
|
title = {Referenced work 6},
|
|
doi = {10.1200/JCO.2002.04.117},
|
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
|
}
|
|
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
for citation_key in ("stubdoi", "seed2024"):
|
|
store.add_entry_topic(
|
|
citation_key,
|
|
topic_slug="graph-methods",
|
|
topic_name="Graph Methods",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
default_export = run_cli(tmp_path, "export-topic", "graph-methods")
|
|
assert default_export.returncode == 0
|
|
assert "@article{seed2024," in default_export.stdout
|
|
assert "@misc{stubdoi," not in default_export.stdout
|
|
|
|
include_export = run_cli(tmp_path, "export-topic", "graph-methods", "--include-stubs")
|
|
assert include_export.returncode == 0
|
|
assert "@misc{stubdoi," in include_export.stdout
|
|
|
|
|
|
def test_cli_search_can_filter_by_topic(tmp_path: Path):
|
|
bib_path = tmp_path / "input.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Graph Methods for Biology},
|
|
year = {2024},
|
|
abstract = {A graph methods paper.}
|
|
}
|
|
|
|
@article{other2023,
|
|
author = {Other, Bob},
|
|
title = {Graph Methods for Chemistry},
|
|
year = {2023},
|
|
abstract = {Another graph methods paper.}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.storage import BibliographyStore
|
|
|
|
database = tmp_path / "library.sqlite3"
|
|
store = BibliographyStore(database)
|
|
try:
|
|
store.add_entry_topic(
|
|
"seed2024",
|
|
topic_slug="biology",
|
|
topic_name="Biology",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/biology",
|
|
source_label="topic-seed",
|
|
)
|
|
store.add_entry_topic(
|
|
"other2023",
|
|
topic_slug="chemistry",
|
|
topic_name="Chemistry",
|
|
source_type="talkorigins",
|
|
source_url="https://example.org/topics/chemistry",
|
|
source_label="topic-seed",
|
|
)
|
|
store.connection.commit()
|
|
finally:
|
|
store.close()
|
|
|
|
search = run_cli(tmp_path, "search", "graph", "--topic", "biology")
|
|
assert search.returncode == 0
|
|
assert "seed2024" in search.stdout
|
|
assert "other2023" not in search.stdout
|
|
|
|
|
|
def test_cli_graph_outputs_missing_targets(tmp_path: Path):
|
|
bib_path = tmp_path / "graph.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
references = {known2023, missing2022}
|
|
}
|
|
|
|
@article{known2023,
|
|
author = {Known, Bob},
|
|
title = {Known Paper},
|
|
year = {2023}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
graph = run_cli(tmp_path, "graph", "seed2024", "--missing-only")
|
|
assert graph.returncode == 0
|
|
payload = json.loads(graph.stdout)
|
|
assert len(payload) == 1
|
|
assert payload[0]["target_citation_key"] == "missing2022"
|
|
assert payload[0]["target_exists"] is False
|
|
|
|
|
|
def test_cli_graph_can_render_dot_output(tmp_path: Path):
|
|
bib_path = tmp_path / "graph.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
references = {known2023, missing2022}
|
|
}
|
|
|
|
@article{known2023,
|
|
author = {Known, Bob},
|
|
title = {Known Paper},
|
|
year = {2023}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
graph = run_cli(tmp_path, "graph", "seed2024", "--format", "dot")
|
|
assert graph.returncode == 0
|
|
assert "digraph citegeist {" in graph.stdout
|
|
assert '"seed2024" [label="seed2024\\\\nSeed Paper\\\\n[draft]"' in graph.stdout
|
|
assert '"seed2024" -> "known2023" [label="cites d=1"]' in graph.stdout
|
|
assert '"seed2024" -> "missing2022" [label="cites d=1"]' in graph.stdout
|
|
|
|
|
|
def test_cli_graph_can_write_dot_output_to_file(tmp_path: Path):
|
|
bib_path = tmp_path / "graph.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
references = {known2023}
|
|
}
|
|
|
|
@article{known2023,
|
|
author = {Known, Bob},
|
|
title = {Known Paper},
|
|
year = {2023}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
output_path = tmp_path / "graph.dot"
|
|
graph = run_cli(tmp_path, "graph", "seed2024", "--format", "dot", "--output", str(output_path))
|
|
assert graph.returncode == 0
|
|
assert graph.stdout == ""
|
|
rendered = output_path.read_text(encoding="utf-8")
|
|
assert "digraph citegeist {" in rendered
|
|
assert '"seed2024" -> "known2023" [label="cites d=1"]' in rendered
|
|
|
|
|
|
def test_cli_graph_can_render_json_graph_output(tmp_path: Path):
|
|
bib_path = tmp_path / "graph.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
references = {known2023, missing2022}
|
|
}
|
|
|
|
@article{known2023,
|
|
author = {Known, Bob},
|
|
title = {Known Paper},
|
|
year = {2023}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
graph = run_cli(tmp_path, "graph", "seed2024", "--format", "json-graph")
|
|
assert graph.returncode == 0
|
|
payload = json.loads(graph.stdout)
|
|
assert [node["id"] for node in payload["nodes"]] == ["known2023", "missing2022", "seed2024"]
|
|
assert payload["nodes"][2]["is_seed"] is True
|
|
assert payload["edges"][0]["source"] == "seed2024"
|
|
assert payload["edges"][0]["target"] == "known2023"
|
|
assert payload["edges"][1]["target_exists"] is False
|
|
|
|
|
|
def test_cli_graph_can_write_json_graph_output_to_file(tmp_path: Path):
|
|
bib_path = tmp_path / "graph.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
references = {known2023}
|
|
}
|
|
|
|
@article{known2023,
|
|
author = {Known, Bob},
|
|
title = {Known Paper},
|
|
year = {2023}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
output_path = tmp_path / "graph.json"
|
|
graph = run_cli(tmp_path, "graph", "seed2024", "--format", "json-graph", "--output", str(output_path))
|
|
assert graph.returncode == 0
|
|
assert graph.stdout == ""
|
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
|
assert [edge["target"] for edge in payload["edges"]] == ["known2023"]
|
|
|
|
|
|
def test_cli_graph_view_renders_html_from_json_graph(tmp_path: Path):
|
|
graph_path = tmp_path / "graph.json"
|
|
graph_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"nodes": [
|
|
{
|
|
"id": "seed2024",
|
|
"label": "seed2024",
|
|
"title": "Seed Paper",
|
|
"review_status": "draft",
|
|
"target_exists": True,
|
|
"is_seed": True,
|
|
},
|
|
{
|
|
"id": "known2023",
|
|
"label": "known2023",
|
|
"title": "Known Paper",
|
|
"review_status": "reviewed",
|
|
"target_exists": True,
|
|
"is_seed": False,
|
|
},
|
|
],
|
|
"edges": [
|
|
{
|
|
"id": "edge-1",
|
|
"source": "seed2024",
|
|
"target": "known2023",
|
|
"relation_type": "cites",
|
|
"depth": 1,
|
|
"target_exists": True,
|
|
}
|
|
],
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
output_path = tmp_path / "graph.html"
|
|
result = run_cli(
|
|
tmp_path,
|
|
"graph-view",
|
|
str(graph_path),
|
|
"--output",
|
|
str(output_path),
|
|
"--title",
|
|
"Graph Demo",
|
|
)
|
|
assert result.returncode == 0
|
|
assert result.stdout == ""
|
|
html = output_path.read_text(encoding="utf-8")
|
|
assert "<!DOCTYPE html>" in html
|
|
assert "<title>Graph Demo</title>" in html
|
|
assert '"seed2024"' in html
|
|
assert '"known2023"' in html
|
|
|
|
|
|
def test_cli_expand_with_mocked_crossref(tmp_path: Path):
|
|
bib_path = tmp_path / "expand.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
doi = {10.1000/seed-doi}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.expand import ExpansionResult
|
|
|
|
with patch("citegeist.cli.CrossrefExpander.expand_entry_references") as mocked_expand:
|
|
mocked_expand.return_value = [
|
|
ExpansionResult(
|
|
source_citation_key="seed2024",
|
|
discovered_citation_key="doi101000exampleref",
|
|
created_entry=True,
|
|
relation_type="cites",
|
|
source_label="crossref:references:10.1000/seed-doi",
|
|
)
|
|
]
|
|
database = tmp_path / "library.sqlite3"
|
|
exit_code = main(["--db", str(database), "expand", "seed2024"])
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_expand_with_mocked_openalex(tmp_path: Path):
|
|
bib_path = tmp_path / "expand-openalex.bib"
|
|
bib_path.write_text(
|
|
"""
|
|
@article{seed2024,
|
|
author = {Seed, Alice},
|
|
title = {Seed Paper},
|
|
year = {2024},
|
|
doi = {10.1000/seed-doi}
|
|
}
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
ingest = run_cli(tmp_path, "ingest", str(bib_path))
|
|
assert ingest.returncode == 0
|
|
|
|
from citegeist.expand import ExpansionResult
|
|
|
|
with patch("citegeist.cli.OpenAlexExpander.expand_entry") as mocked_expand:
|
|
mocked_expand.return_value = [
|
|
ExpansionResult(
|
|
source_citation_key="seed2024",
|
|
discovered_citation_key="openalexw12345",
|
|
created_entry=True,
|
|
relation_type="cites",
|
|
source_label="openalex:cites:WSEED",
|
|
)
|
|
]
|
|
database = tmp_path / "library.sqlite3"
|
|
exit_code = main(
|
|
["--db", str(database), "expand", "seed2024", "--source", "openalex", "--relation", "cites"]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
|
|
|
|
def test_cli_expand_topic_with_mocked_expander(tmp_path: Path):
|
|
from citegeist.expand import TopicExpansionResult
|
|
|
|
with patch("citegeist.cli.TopicExpander.expand_topic") as mocked_expand:
|
|
mocked_expand.return_value = [
|
|
TopicExpansionResult(
|
|
topic_slug="abiogenesis",
|
|
source_citation_key="seed2024",
|
|
discovered_citation_key="discovered1",
|
|
discovered_title="Abiogenesis origin chemistry",
|
|
created_entry=True,
|
|
relation_type="cites",
|
|
source_label="openalex:cites:seed2024",
|
|
relevance_score=0.67,
|
|
meets_relevance_threshold=True,
|
|
assigned_to_topic=True,
|
|
)
|
|
]
|
|
database = tmp_path / "library.sqlite3"
|
|
exit_code = main(
|
|
[
|
|
"--db",
|
|
str(database),
|
|
"expand-topic",
|
|
"abiogenesis",
|
|
"--topic-phrase",
|
|
"abiogenesis origin chemistry",
|
|
"--seed-key",
|
|
"seed2024",
|
|
"--min-relevance",
|
|
"0.3",
|
|
"--preview",
|
|
]
|
|
)
|
|
|
|
assert exit_code == 0
|
|
_, kwargs = mocked_expand.call_args
|
|
assert kwargs["preview_only"] is True
|