Add Notebook bibliography bundle export

This commit is contained in:
welsberr 2026-04-28 00:29:45 -04:00
parent 0497e18f04
commit 89bc56a7aa
4 changed files with 121 additions and 1 deletions

View File

@ -31,6 +31,7 @@ This documentation therefore emphasizes:
1. Crossref, OpenAlex, PubMed, Europe PMC, Semantic Scholar, DataCite, DBLP, arXiv, and OAI-PMH are already in play. 1. Crossref, OpenAlex, PubMed, Europe PMC, Semantic Scholar, DataCite, DBLP, arXiv, and OAI-PMH are already in play.
2. OpenCitations and Unpaywall are now integrated as source-layer additions. 2. OpenCitations and Unpaywall are now integrated as source-layer additions.
3. The SQLite-based local workflow remains the baseline. 3. The SQLite-based local workflow remains the baseline.
4. Notebook-ready topic bibliography bundles can now be exported with `export-notebook-topic` for downstream `Didactopus`/Notebook use.
### Recommended Next Sources ### Recommended Next Sources
1. OpenAIRE only if repository-acquisition scope expands 1. OpenAIRE only if repository-acquisition scope expands

View File

@ -12,6 +12,7 @@ from .bibtex import BibEntry, parse_bibtex, render_bibtex
from .bootstrap import Bootstrapper from .bootstrap import Bootstrapper
from .examples.talkorigins import TalkOriginsScraper from .examples.talkorigins import TalkOriginsScraper
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
from .notebook_export import export_notebook_topic_bundle
from .extract import ( from .extract import (
available_extraction_backends, available_extraction_backends,
check_extraction_comparison_summary, check_extraction_comparison_summary,
@ -693,6 +694,18 @@ def build_parser() -> argparse.ArgumentParser:
help="Include DOI-only placeholder records in the topic export", help="Include DOI-only placeholder records in the topic export",
) )
export_notebook_topic_parser = subparsers.add_parser(
"export-notebook-topic",
help="Export a Notebook-ready bibliography bundle for one topic",
)
export_notebook_topic_parser.add_argument("topic_slug", help="Topic slug to export")
export_notebook_topic_parser.add_argument("--output-dir", required=True, help="Directory to write the Notebook bundle")
export_notebook_topic_parser.add_argument(
"--include-stubs",
action="store_true",
help="Include DOI-only placeholder records in the Notebook bibliography",
)
return parser return parser
@ -912,6 +925,8 @@ def main(argv: list[str] | None = None) -> int:
return _run_topic_entries(store, args.topic_slug, args.limit) return _run_topic_entries(store, args.topic_slug, args.limit)
if args.command == "export-topic": if args.command == "export-topic":
return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs) return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs)
if args.command == "export-notebook-topic":
return _run_export_notebook_topic(store, args.topic_slug, args.output_dir, args.include_stubs)
finally: finally:
store.close() store.close()
@ -2335,3 +2350,13 @@ def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | N
if rendered: if rendered:
print(rendered) print(rendered)
return 0 return 0
def _run_export_notebook_topic(store: BibliographyStore, topic_slug: str, output_dir: str, include_stubs: bool) -> int:
try:
payload = export_notebook_topic_bundle(store.path, topic_slug, output_dir, include_stubs=include_stubs)
except KeyError:
print(f"Topic not found: {topic_slug}", file=sys.stderr)
return 1
print(json.dumps(payload, indent=2))
return 0

View File

@ -0,0 +1,52 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from .storage import BibliographyStore
def export_notebook_topic_bundle(
store_dir: str | Path,
topic_slug: str,
out_dir: str | Path,
*,
include_stubs: bool = False,
) -> dict[str, Any]:
store = BibliographyStore(store_dir)
try:
topic = store.get_topic(topic_slug)
if topic is None:
raise KeyError(f"Topic not found: {topic_slug}")
entries = store.list_topic_entries(topic_slug, limit=100000)
citation_keys = [row["citation_key"] for row in entries]
bibtex_report = store.export_bibtex_report(citation_keys, include_stubs=include_stubs)
finally:
store.close()
target = Path(out_dir)
target.mkdir(parents=True, exist_ok=True)
bibliography_path = target / "notebook_topic_bibliography.bib"
bibliography_text = bibtex_report["bibtex"]
bibliography_path.write_text(bibliography_text + ("\n" if bibliography_text else ""), encoding="utf-8")
bundle = {
"bundle_kind": "notebook_topic_bibliography_bundle",
"topic": topic,
"entry_count": len(entries),
"exported_count": bibtex_report["exported_count"],
"include_stubs": include_stubs,
"skipped": bibtex_report["skipped"],
"citation_keys": citation_keys,
"bibliography_path": str(bibliography_path),
}
bundle_path = target / "notebook_topic_bundle.json"
bundle_path.write_text(json.dumps(bundle, indent=2), encoding="utf-8")
return {
"bundle_path": str(bundle_path),
"bibliography_path": str(bibliography_path),
"bundle": bundle,
}

View File

@ -41,9 +41,12 @@ SAMPLE_BIB = """
def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]: def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]:
database = tmp_path / "library.sqlite3" database = tmp_path / "library.sqlite3"
python = Path(__file__).resolve().parents[1] / ".venv/bin/python"
if not python.exists():
python = Path(sys.executable)
env = {"PYTHONPATH": "src"} env = {"PYTHONPATH": "src"}
return subprocess.run( return subprocess.run(
[sys.executable, "-m", "citegeist", "--db", str(database), *args], [str(python), "-m", "citegeist", "--db", str(database), *args],
cwd=Path(__file__).resolve().parents[1], cwd=Path(__file__).resolve().parents[1],
env=env, env=env,
capture_output=True, capture_output=True,
@ -1681,6 +1684,45 @@ def test_cli_export_topic(tmp_path: Path):
assert "@article{seed2024," in exported assert "@article{seed2024," in exported
def test_cli_export_notebook_topic_bundle(tmp_path: Path):
bib_path = tmp_path / "input.bib"
bib_path.write_text(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Graph Topic Result},
year = {2024}
}
""",
encoding="utf-8",
)
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
from citegeist.storage import BibliographyStore
database = tmp_path / "library.sqlite3"
store = BibliographyStore(database)
try:
store.add_entry_topic(
"seed2024",
topic_slug="graph-topic",
topic_name="Graph Topic",
source_label="seed",
)
store.connection.commit()
finally:
store.close()
output_dir = tmp_path / "notebook-export"
result = run_cli(tmp_path, "export-notebook-topic", "graph-topic", "--output-dir", str(output_dir))
assert result.returncode == 0
payload = json.loads(result.stdout)
assert payload["bundle"]["bundle_kind"] == "notebook_topic_bibliography_bundle"
assert (output_dir / "notebook_topic_bundle.json").exists()
assert (output_dir / "notebook_topic_bibliography.bib").exists()
assert "@article{seed2024," in (output_dir / "notebook_topic_bibliography.bib").read_text(encoding="utf-8")
def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path): def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path):
bib_path = tmp_path / "input.bib" bib_path = tmp_path / "input.bib"
bib_path.write_text( bib_path.write_text(