From 89bc56a7aa48fc5310f532281a6f36d8074cc8f0 Mon Sep 17 00:00:00 2001 From: welsberr Date: Tue, 28 Apr 2026 00:29:45 -0400 Subject: [PATCH] Add Notebook bibliography bundle export --- docs/README.md | 1 + src/citegeist/cli.py | 25 +++++++++++++++ src/citegeist/notebook_export.py | 52 ++++++++++++++++++++++++++++++++ tests/test_cli.py | 44 ++++++++++++++++++++++++++- 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 src/citegeist/notebook_export.py diff --git a/docs/README.md b/docs/README.md index b536eda..f8b24bd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -31,6 +31,7 @@ This documentation therefore emphasizes: 1. Crossref, OpenAlex, PubMed, Europe PMC, Semantic Scholar, DataCite, DBLP, arXiv, and OAI-PMH are already in play. 2. OpenCitations and Unpaywall are now integrated as source-layer additions. 3. The SQLite-based local workflow remains the baseline. +4. Notebook-ready topic bibliography bundles can now be exported with `export-notebook-topic` for downstream `Didactopus`/Notebook use. ### Recommended Next Sources 1. OpenAIRE only if repository-acquisition scope expands diff --git a/src/citegeist/cli.py b/src/citegeist/cli.py index b280f6f..6514ba9 100644 --- a/src/citegeist/cli.py +++ b/src/citegeist/cli.py @@ -12,6 +12,7 @@ from .bibtex import BibEntry, parse_bibtex, render_bibtex from .bootstrap import Bootstrapper from .examples.talkorigins import TalkOriginsScraper from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types +from .notebook_export import export_notebook_topic_bundle from .extract import ( available_extraction_backends, check_extraction_comparison_summary, @@ -693,6 +694,18 @@ def build_parser() -> argparse.ArgumentParser: help="Include DOI-only placeholder records in the topic export", ) + export_notebook_topic_parser = subparsers.add_parser( + "export-notebook-topic", + help="Export a Notebook-ready bibliography bundle for one topic", + ) + export_notebook_topic_parser.add_argument("topic_slug", help="Topic slug to export") + export_notebook_topic_parser.add_argument("--output-dir", required=True, help="Directory to write the Notebook bundle") + export_notebook_topic_parser.add_argument( + "--include-stubs", + action="store_true", + help="Include DOI-only placeholder records in the Notebook bibliography", + ) + return parser @@ -912,6 +925,8 @@ def main(argv: list[str] | None = None) -> int: return _run_topic_entries(store, args.topic_slug, args.limit) if args.command == "export-topic": return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs) + if args.command == "export-notebook-topic": + return _run_export_notebook_topic(store, args.topic_slug, args.output_dir, args.include_stubs) finally: store.close() @@ -2335,3 +2350,13 @@ def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | N if rendered: print(rendered) return 0 + + +def _run_export_notebook_topic(store: BibliographyStore, topic_slug: str, output_dir: str, include_stubs: bool) -> int: + try: + payload = export_notebook_topic_bundle(store.path, topic_slug, output_dir, include_stubs=include_stubs) + except KeyError: + print(f"Topic not found: {topic_slug}", file=sys.stderr) + return 1 + print(json.dumps(payload, indent=2)) + return 0 diff --git a/src/citegeist/notebook_export.py b/src/citegeist/notebook_export.py new file mode 100644 index 0000000..8e9ff32 --- /dev/null +++ b/src/citegeist/notebook_export.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from .storage import BibliographyStore + + +def export_notebook_topic_bundle( + store_dir: str | Path, + topic_slug: str, + out_dir: str | Path, + *, + include_stubs: bool = False, +) -> dict[str, Any]: + store = BibliographyStore(store_dir) + try: + topic = store.get_topic(topic_slug) + if topic is None: + raise KeyError(f"Topic not found: {topic_slug}") + entries = store.list_topic_entries(topic_slug, limit=100000) + citation_keys = [row["citation_key"] for row in entries] + bibtex_report = store.export_bibtex_report(citation_keys, include_stubs=include_stubs) + finally: + store.close() + + target = Path(out_dir) + target.mkdir(parents=True, exist_ok=True) + + bibliography_path = target / "notebook_topic_bibliography.bib" + bibliography_text = bibtex_report["bibtex"] + bibliography_path.write_text(bibliography_text + ("\n" if bibliography_text else ""), encoding="utf-8") + + bundle = { + "bundle_kind": "notebook_topic_bibliography_bundle", + "topic": topic, + "entry_count": len(entries), + "exported_count": bibtex_report["exported_count"], + "include_stubs": include_stubs, + "skipped": bibtex_report["skipped"], + "citation_keys": citation_keys, + "bibliography_path": str(bibliography_path), + } + bundle_path = target / "notebook_topic_bundle.json" + bundle_path.write_text(json.dumps(bundle, indent=2), encoding="utf-8") + + return { + "bundle_path": str(bundle_path), + "bibliography_path": str(bibliography_path), + "bundle": bundle, + } diff --git a/tests/test_cli.py b/tests/test_cli.py index 66b6ad9..4f8e6d8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -41,9 +41,12 @@ SAMPLE_BIB = """ def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]: database = tmp_path / "library.sqlite3" + python = Path(__file__).resolve().parents[1] / ".venv/bin/python" + if not python.exists(): + python = Path(sys.executable) env = {"PYTHONPATH": "src"} return subprocess.run( - [sys.executable, "-m", "citegeist", "--db", str(database), *args], + [str(python), "-m", "citegeist", "--db", str(database), *args], cwd=Path(__file__).resolve().parents[1], env=env, capture_output=True, @@ -1681,6 +1684,45 @@ def test_cli_export_topic(tmp_path: Path): assert "@article{seed2024," in exported +def test_cli_export_notebook_topic_bundle(tmp_path: Path): + bib_path = tmp_path / "input.bib" + bib_path.write_text( + """ +@article{seed2024, + author = {Seed, Alice}, + title = {Graph Topic Result}, + year = {2024} +} +""", + encoding="utf-8", + ) + assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0 + + from citegeist.storage import BibliographyStore + + database = tmp_path / "library.sqlite3" + store = BibliographyStore(database) + try: + store.add_entry_topic( + "seed2024", + topic_slug="graph-topic", + topic_name="Graph Topic", + source_label="seed", + ) + store.connection.commit() + finally: + store.close() + + output_dir = tmp_path / "notebook-export" + result = run_cli(tmp_path, "export-notebook-topic", "graph-topic", "--output-dir", str(output_dir)) + assert result.returncode == 0 + payload = json.loads(result.stdout) + assert payload["bundle"]["bundle_kind"] == "notebook_topic_bibliography_bundle" + assert (output_dir / "notebook_topic_bundle.json").exists() + assert (output_dir / "notebook_topic_bibliography.bib").exists() + assert "@article{seed2024," in (output_dir / "notebook_topic_bibliography.bib").read_text(encoding="utf-8") + + def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path): bib_path = tmp_path / "input.bib" bib_path.write_text(