Add Notebook bibliography bundle export
This commit is contained in:
parent
0497e18f04
commit
89bc56a7aa
|
|
@ -31,6 +31,7 @@ This documentation therefore emphasizes:
|
|||
1. Crossref, OpenAlex, PubMed, Europe PMC, Semantic Scholar, DataCite, DBLP, arXiv, and OAI-PMH are already in play.
|
||||
2. OpenCitations and Unpaywall are now integrated as source-layer additions.
|
||||
3. The SQLite-based local workflow remains the baseline.
|
||||
4. Notebook-ready topic bibliography bundles can now be exported with `export-notebook-topic` for downstream `Didactopus`/Notebook use.
|
||||
|
||||
### Recommended Next Sources
|
||||
1. OpenAIRE only if repository-acquisition scope expands
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
|||
from .bootstrap import Bootstrapper
|
||||
from .examples.talkorigins import TalkOriginsScraper
|
||||
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
|
||||
from .notebook_export import export_notebook_topic_bundle
|
||||
from .extract import (
|
||||
available_extraction_backends,
|
||||
check_extraction_comparison_summary,
|
||||
|
|
@ -693,6 +694,18 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
help="Include DOI-only placeholder records in the topic export",
|
||||
)
|
||||
|
||||
export_notebook_topic_parser = subparsers.add_parser(
|
||||
"export-notebook-topic",
|
||||
help="Export a Notebook-ready bibliography bundle for one topic",
|
||||
)
|
||||
export_notebook_topic_parser.add_argument("topic_slug", help="Topic slug to export")
|
||||
export_notebook_topic_parser.add_argument("--output-dir", required=True, help="Directory to write the Notebook bundle")
|
||||
export_notebook_topic_parser.add_argument(
|
||||
"--include-stubs",
|
||||
action="store_true",
|
||||
help="Include DOI-only placeholder records in the Notebook bibliography",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
|
|
@ -912,6 +925,8 @@ def main(argv: list[str] | None = None) -> int:
|
|||
return _run_topic_entries(store, args.topic_slug, args.limit)
|
||||
if args.command == "export-topic":
|
||||
return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs)
|
||||
if args.command == "export-notebook-topic":
|
||||
return _run_export_notebook_topic(store, args.topic_slug, args.output_dir, args.include_stubs)
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
|
@ -2335,3 +2350,13 @@ def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | N
|
|||
if rendered:
|
||||
print(rendered)
|
||||
return 0
|
||||
|
||||
|
||||
def _run_export_notebook_topic(store: BibliographyStore, topic_slug: str, output_dir: str, include_stubs: bool) -> int:
|
||||
try:
|
||||
payload = export_notebook_topic_bundle(store.path, topic_slug, output_dir, include_stubs=include_stubs)
|
||||
except KeyError:
|
||||
print(f"Topic not found: {topic_slug}", file=sys.stderr)
|
||||
return 1
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .storage import BibliographyStore
|
||||
|
||||
|
||||
def export_notebook_topic_bundle(
|
||||
store_dir: str | Path,
|
||||
topic_slug: str,
|
||||
out_dir: str | Path,
|
||||
*,
|
||||
include_stubs: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
store = BibliographyStore(store_dir)
|
||||
try:
|
||||
topic = store.get_topic(topic_slug)
|
||||
if topic is None:
|
||||
raise KeyError(f"Topic not found: {topic_slug}")
|
||||
entries = store.list_topic_entries(topic_slug, limit=100000)
|
||||
citation_keys = [row["citation_key"] for row in entries]
|
||||
bibtex_report = store.export_bibtex_report(citation_keys, include_stubs=include_stubs)
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
target = Path(out_dir)
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
bibliography_path = target / "notebook_topic_bibliography.bib"
|
||||
bibliography_text = bibtex_report["bibtex"]
|
||||
bibliography_path.write_text(bibliography_text + ("\n" if bibliography_text else ""), encoding="utf-8")
|
||||
|
||||
bundle = {
|
||||
"bundle_kind": "notebook_topic_bibliography_bundle",
|
||||
"topic": topic,
|
||||
"entry_count": len(entries),
|
||||
"exported_count": bibtex_report["exported_count"],
|
||||
"include_stubs": include_stubs,
|
||||
"skipped": bibtex_report["skipped"],
|
||||
"citation_keys": citation_keys,
|
||||
"bibliography_path": str(bibliography_path),
|
||||
}
|
||||
bundle_path = target / "notebook_topic_bundle.json"
|
||||
bundle_path.write_text(json.dumps(bundle, indent=2), encoding="utf-8")
|
||||
|
||||
return {
|
||||
"bundle_path": str(bundle_path),
|
||||
"bibliography_path": str(bibliography_path),
|
||||
"bundle": bundle,
|
||||
}
|
||||
|
|
@ -41,9 +41,12 @@ SAMPLE_BIB = """
|
|||
|
||||
def run_cli(tmp_path: Path, *args: str) -> subprocess.CompletedProcess[str]:
|
||||
database = tmp_path / "library.sqlite3"
|
||||
python = Path(__file__).resolve().parents[1] / ".venv/bin/python"
|
||||
if not python.exists():
|
||||
python = Path(sys.executable)
|
||||
env = {"PYTHONPATH": "src"}
|
||||
return subprocess.run(
|
||||
[sys.executable, "-m", "citegeist", "--db", str(database), *args],
|
||||
[str(python), "-m", "citegeist", "--db", str(database), *args],
|
||||
cwd=Path(__file__).resolve().parents[1],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
|
|
@ -1681,6 +1684,45 @@ def test_cli_export_topic(tmp_path: Path):
|
|||
assert "@article{seed2024," in exported
|
||||
|
||||
|
||||
def test_cli_export_notebook_topic_bundle(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Graph Topic Result},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||
|
||||
from citegeist.storage import BibliographyStore
|
||||
|
||||
database = tmp_path / "library.sqlite3"
|
||||
store = BibliographyStore(database)
|
||||
try:
|
||||
store.add_entry_topic(
|
||||
"seed2024",
|
||||
topic_slug="graph-topic",
|
||||
topic_name="Graph Topic",
|
||||
source_label="seed",
|
||||
)
|
||||
store.connection.commit()
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
output_dir = tmp_path / "notebook-export"
|
||||
result = run_cli(tmp_path, "export-notebook-topic", "graph-topic", "--output-dir", str(output_dir))
|
||||
assert result.returncode == 0
|
||||
payload = json.loads(result.stdout)
|
||||
assert payload["bundle"]["bundle_kind"] == "notebook_topic_bibliography_bundle"
|
||||
assert (output_dir / "notebook_topic_bundle.json").exists()
|
||||
assert (output_dir / "notebook_topic_bibliography.bib").exists()
|
||||
assert "@article{seed2024," in (output_dir / "notebook_topic_bibliography.bib").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
|
|
|
|||
Loading…
Reference in New Issue