Skip stub entries in default exports
This commit is contained in:
parent
912dc59301
commit
0144bd9ef4
|
|
@ -154,6 +154,8 @@ PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 export --outpu
|
|||
|
||||
For a fuller option-by-option CLI cookbook, see [examples/cli/README.md](./examples/cli/README.md).
|
||||
|
||||
Broad BibTeX exports skip DOI-only placeholder records such as `Referenced work N` by default. Use `--include-stubs` on `export` or `export-topic` if you want those entries included anyway.
|
||||
|
||||
For live-source development, prefer fixture-backed or cache-backed source clients so resolver and expansion work can be exercised repeatedly without re-hitting upstream APIs on every run.
|
||||
|
||||
## Example Application
|
||||
|
|
|
|||
|
|
@ -129,6 +129,12 @@ Write BibTeX to a file:
|
|||
.venv/bin/python -m citegeist --db library.sqlite3 export --output artificial-life.bib
|
||||
```
|
||||
|
||||
Include DOI-only placeholder records in a broad export:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 export --include-stubs --output artificial-life.bib
|
||||
```
|
||||
|
||||
## Review And Clean Metadata
|
||||
|
||||
Purpose: inspect merge conflicts, apply corrections, and enrich incomplete records.
|
||||
|
|
@ -393,6 +399,12 @@ Write the topic slice to a file:
|
|||
.venv/bin/python -m citegeist --db library.sqlite3 export-topic artificial-life --output artificial-life-topic.bib
|
||||
```
|
||||
|
||||
Include DOI-only placeholder records in the topic export:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 export-topic artificial-life --include-stubs --output artificial-life-topic.bib
|
||||
```
|
||||
|
||||
### Bootstrap
|
||||
|
||||
Seed from a BibTeX file:
|
||||
|
|
|
|||
|
|
@ -43,6 +43,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
export_parser = subparsers.add_parser("export", help="Export entries as BibTeX")
|
||||
export_parser.add_argument("citation_keys", nargs="*", help="Optional citation keys to export")
|
||||
export_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
||||
export_parser.add_argument(
|
||||
"--include-stubs",
|
||||
action="store_true",
|
||||
help="Include DOI-only placeholder records in broad exports",
|
||||
)
|
||||
|
||||
status_parser = subparsers.add_parser("set-status", help="Set the review status for one entry")
|
||||
status_parser.add_argument("citation_key", help="Citation key to update")
|
||||
|
|
@ -494,6 +499,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
)
|
||||
export_topic_parser.add_argument("topic_slug", help="Topic slug to export")
|
||||
export_topic_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
||||
export_topic_parser.add_argument(
|
||||
"--include-stubs",
|
||||
action="store_true",
|
||||
help="Include DOI-only placeholder records in the topic export",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
|
@ -511,7 +521,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.command == "show":
|
||||
return _run_show(store, args.citation_key, args.limit, args.provenance, args.conflicts)
|
||||
if args.command == "export":
|
||||
return _run_export(store, args.citation_keys, args.output)
|
||||
return _run_export(store, args.citation_keys, args.output, args.include_stubs)
|
||||
if args.command == "set-status":
|
||||
return _run_set_status(store, args.citation_key, args.review_status)
|
||||
if args.command == "resolve-conflicts":
|
||||
|
|
@ -660,7 +670,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.command == "topic-entries":
|
||||
return _run_topic_entries(store, args.topic_slug, args.limit)
|
||||
if args.command == "export-topic":
|
||||
return _run_export_topic(store, args.topic_slug, args.output)
|
||||
return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs)
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
|
@ -715,8 +725,14 @@ def _run_show(
|
|||
return 0
|
||||
|
||||
|
||||
def _run_export(store: BibliographyStore, citation_keys: list[str], output: str | None) -> int:
|
||||
rendered = store.export_bibtex(citation_keys or None)
|
||||
def _run_export(
|
||||
store: BibliographyStore,
|
||||
citation_keys: list[str],
|
||||
output: str | None,
|
||||
include_stubs: bool,
|
||||
) -> int:
|
||||
explicit_keys = citation_keys or None
|
||||
rendered = store.export_bibtex(explicit_keys, include_stubs=include_stubs or explicit_keys is not None)
|
||||
if output:
|
||||
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
||||
else:
|
||||
|
|
@ -1731,13 +1747,13 @@ def _run_topic_entries(store: BibliographyStore, topic_slug: str, limit: int) ->
|
|||
return 0
|
||||
|
||||
|
||||
def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | None) -> int:
|
||||
def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | None, include_stubs: bool) -> int:
|
||||
topic = store.get_topic(topic_slug)
|
||||
if topic is None:
|
||||
print(f"Topic not found: {topic_slug}", file=sys.stderr)
|
||||
return 1
|
||||
citation_keys = [row["citation_key"] for row in store.list_topic_entries(topic_slug, limit=100000)]
|
||||
rendered = store.export_bibtex(citation_keys)
|
||||
rendered = store.export_bibtex(citation_keys, include_stubs=include_stubs)
|
||||
if output:
|
||||
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1010,7 +1010,10 @@ class BibliographyStore:
|
|||
return None
|
||||
return render_bibtex([entry])
|
||||
|
||||
def export_bibtex(self, citation_keys: list[str] | None = None) -> str:
|
||||
def export_bibtex(self, citation_keys: list[str] | None = None, include_stubs: bool | None = None) -> str:
|
||||
explicit_keys = citation_keys is not None
|
||||
if include_stubs is None:
|
||||
include_stubs = explicit_keys
|
||||
if citation_keys is None:
|
||||
rows = self.connection.execute(
|
||||
"SELECT citation_key FROM entries ORDER BY COALESCE(year, ''), citation_key"
|
||||
|
|
@ -1022,6 +1025,8 @@ class BibliographyStore:
|
|||
for citation_key in citation_keys:
|
||||
entry = self._load_bib_entry(citation_key)
|
||||
if entry is not None:
|
||||
if not include_stubs and self._is_export_stub(entry):
|
||||
continue
|
||||
entries.append(entry)
|
||||
if not entries:
|
||||
return ""
|
||||
|
|
@ -1091,6 +1096,22 @@ class BibliographyStore:
|
|||
fields=dict(fields),
|
||||
)
|
||||
|
||||
def _is_export_stub(self, entry: BibEntry) -> bool:
|
||||
title = " ".join(entry.fields.get("title", "").split()).strip().lower()
|
||||
doi = " ".join(entry.fields.get("doi", "").split()).strip()
|
||||
url = " ".join(entry.fields.get("url", "").split()).strip()
|
||||
has_author = bool(" ".join(entry.fields.get("author", "").split()).strip())
|
||||
has_abstract = bool(" ".join(entry.fields.get("abstract", "").split()).strip())
|
||||
has_journal = bool(" ".join(entry.fields.get("journal", "").split()).strip())
|
||||
has_booktitle = bool(" ".join(entry.fields.get("booktitle", "").split()).strip())
|
||||
if not doi:
|
||||
return False
|
||||
if title and not (title.startswith("referenced work ") or title.startswith("untitled")):
|
||||
return False
|
||||
return not any((has_author, has_abstract, has_journal, has_booktitle)) and (
|
||||
not url or url.startswith("https://doi.org/")
|
||||
)
|
||||
|
||||
def _load_creator_names(self, citation_key: str, role: str) -> list[str]:
|
||||
rows = self.connection.execute(
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -74,6 +74,42 @@ def test_cli_ingest_show_search_and_export(tmp_path: Path):
|
|||
assert "@article{smith2024graphs," in exported
|
||||
|
||||
|
||||
def test_cli_export_skips_stub_entries_by_default_but_can_include_them(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@misc{stubdoi,
|
||||
title = {Referenced work 6},
|
||||
doi = {10.1200/JCO.2002.04.117},
|
||||
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||
}
|
||||
|
||||
@article{realentry,
|
||||
author = {Smith, Jane},
|
||||
title = {Real Entry},
|
||||
year = {2024},
|
||||
doi = {10.1000/real}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||
|
||||
default_export = run_cli(tmp_path, "export")
|
||||
assert default_export.returncode == 0
|
||||
assert "@article{realentry," in default_export.stdout
|
||||
assert "@misc{stubdoi," not in default_export.stdout
|
||||
|
||||
explicit_export = run_cli(tmp_path, "export", "stubdoi")
|
||||
assert explicit_export.returncode == 0
|
||||
assert "@misc{stubdoi," in explicit_export.stdout
|
||||
|
||||
include_export = run_cli(tmp_path, "export", "--include-stubs")
|
||||
assert include_export.returncode == 0
|
||||
assert "@misc{stubdoi," in include_export.stdout
|
||||
|
||||
|
||||
def test_cli_provenance_and_status_updates(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
|
||||
|
|
@ -1140,6 +1176,52 @@ def test_cli_export_topic(tmp_path: Path):
|
|||
assert "@article{seed2024," in exported
|
||||
|
||||
|
||||
def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@misc{stubdoi,
|
||||
title = {Referenced work 6},
|
||||
doi = {10.1200/JCO.2002.04.117},
|
||||
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||
}
|
||||
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||
|
||||
from citegeist.storage import BibliographyStore
|
||||
|
||||
database = tmp_path / "library.sqlite3"
|
||||
store = BibliographyStore(database)
|
||||
try:
|
||||
for citation_key in ("stubdoi", "seed2024"):
|
||||
store.add_entry_topic(
|
||||
citation_key,
|
||||
topic_slug="graph-methods",
|
||||
topic_name="Graph Methods",
|
||||
source_label="topic-seed",
|
||||
)
|
||||
store.connection.commit()
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
default_export = run_cli(tmp_path, "export-topic", "graph-methods")
|
||||
assert default_export.returncode == 0
|
||||
assert "@article{seed2024," in default_export.stdout
|
||||
assert "@misc{stubdoi," not in default_export.stdout
|
||||
|
||||
include_export = run_cli(tmp_path, "export-topic", "graph-methods", "--include-stubs")
|
||||
assert include_export.returncode == 0
|
||||
assert "@misc{stubdoi," in include_export.stdout
|
||||
|
||||
|
||||
def test_cli_search_can_filter_by_topic(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
|
|
|
|||
|
|
@ -69,6 +69,39 @@ def test_store_exports_bibtex_from_normalized_rows():
|
|||
store.close()
|
||||
|
||||
|
||||
def test_store_export_skips_doi_only_stub_by_default():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@misc{stubdoi,
|
||||
title = {Referenced work 6},
|
||||
doi = {10.1200/JCO.2002.04.117},
|
||||
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||
}
|
||||
|
||||
@article{realentry,
|
||||
author = {Smith, Jane},
|
||||
title = {Real Entry},
|
||||
year = {2024},
|
||||
doi = {10.1000/real}
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
exported = store.export_bibtex()
|
||||
assert "@article{realentry," in exported
|
||||
assert "@misc{stubdoi," not in exported
|
||||
|
||||
explicit = store.export_bibtex(["stubdoi"])
|
||||
assert "@misc{stubdoi," in explicit
|
||||
|
||||
with_stubs = store.export_bibtex(include_stubs=True)
|
||||
assert "@misc{stubdoi," in with_stubs
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_store_records_provenance_and_review_status():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in New Issue