Skip stub entries in default exports
This commit is contained in:
parent
912dc59301
commit
0144bd9ef4
|
|
@ -154,6 +154,8 @@ PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 export --outpu
|
||||||
|
|
||||||
For a fuller option-by-option CLI cookbook, see [examples/cli/README.md](./examples/cli/README.md).
|
For a fuller option-by-option CLI cookbook, see [examples/cli/README.md](./examples/cli/README.md).
|
||||||
|
|
||||||
|
Broad BibTeX exports skip DOI-only placeholder records such as `Referenced work N` by default. Use `--include-stubs` on `export` or `export-topic` if you want those entries included anyway.
|
||||||
|
|
||||||
For live-source development, prefer fixture-backed or cache-backed source clients so resolver and expansion work can be exercised repeatedly without re-hitting upstream APIs on every run.
|
For live-source development, prefer fixture-backed or cache-backed source clients so resolver and expansion work can be exercised repeatedly without re-hitting upstream APIs on every run.
|
||||||
|
|
||||||
## Example Application
|
## Example Application
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,12 @@ Write BibTeX to a file:
|
||||||
.venv/bin/python -m citegeist --db library.sqlite3 export --output artificial-life.bib
|
.venv/bin/python -m citegeist --db library.sqlite3 export --output artificial-life.bib
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Include DOI-only placeholder records in a broad export:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
.venv/bin/python -m citegeist --db library.sqlite3 export --include-stubs --output artificial-life.bib
|
||||||
|
```
|
||||||
|
|
||||||
## Review And Clean Metadata
|
## Review And Clean Metadata
|
||||||
|
|
||||||
Purpose: inspect merge conflicts, apply corrections, and enrich incomplete records.
|
Purpose: inspect merge conflicts, apply corrections, and enrich incomplete records.
|
||||||
|
|
@ -393,6 +399,12 @@ Write the topic slice to a file:
|
||||||
.venv/bin/python -m citegeist --db library.sqlite3 export-topic artificial-life --output artificial-life-topic.bib
|
.venv/bin/python -m citegeist --db library.sqlite3 export-topic artificial-life --output artificial-life-topic.bib
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Include DOI-only placeholder records in the topic export:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
.venv/bin/python -m citegeist --db library.sqlite3 export-topic artificial-life --include-stubs --output artificial-life-topic.bib
|
||||||
|
```
|
||||||
|
|
||||||
### Bootstrap
|
### Bootstrap
|
||||||
|
|
||||||
Seed from a BibTeX file:
|
Seed from a BibTeX file:
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,11 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
export_parser = subparsers.add_parser("export", help="Export entries as BibTeX")
|
export_parser = subparsers.add_parser("export", help="Export entries as BibTeX")
|
||||||
export_parser.add_argument("citation_keys", nargs="*", help="Optional citation keys to export")
|
export_parser.add_argument("citation_keys", nargs="*", help="Optional citation keys to export")
|
||||||
export_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
export_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
||||||
|
export_parser.add_argument(
|
||||||
|
"--include-stubs",
|
||||||
|
action="store_true",
|
||||||
|
help="Include DOI-only placeholder records in broad exports",
|
||||||
|
)
|
||||||
|
|
||||||
status_parser = subparsers.add_parser("set-status", help="Set the review status for one entry")
|
status_parser = subparsers.add_parser("set-status", help="Set the review status for one entry")
|
||||||
status_parser.add_argument("citation_key", help="Citation key to update")
|
status_parser.add_argument("citation_key", help="Citation key to update")
|
||||||
|
|
@ -494,6 +499,11 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
)
|
)
|
||||||
export_topic_parser.add_argument("topic_slug", help="Topic slug to export")
|
export_topic_parser.add_argument("topic_slug", help="Topic slug to export")
|
||||||
export_topic_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
export_topic_parser.add_argument("--output", help="Write BibTeX to a file instead of stdout")
|
||||||
|
export_topic_parser.add_argument(
|
||||||
|
"--include-stubs",
|
||||||
|
action="store_true",
|
||||||
|
help="Include DOI-only placeholder records in the topic export",
|
||||||
|
)
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
@ -511,7 +521,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
if args.command == "show":
|
if args.command == "show":
|
||||||
return _run_show(store, args.citation_key, args.limit, args.provenance, args.conflicts)
|
return _run_show(store, args.citation_key, args.limit, args.provenance, args.conflicts)
|
||||||
if args.command == "export":
|
if args.command == "export":
|
||||||
return _run_export(store, args.citation_keys, args.output)
|
return _run_export(store, args.citation_keys, args.output, args.include_stubs)
|
||||||
if args.command == "set-status":
|
if args.command == "set-status":
|
||||||
return _run_set_status(store, args.citation_key, args.review_status)
|
return _run_set_status(store, args.citation_key, args.review_status)
|
||||||
if args.command == "resolve-conflicts":
|
if args.command == "resolve-conflicts":
|
||||||
|
|
@ -660,7 +670,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
if args.command == "topic-entries":
|
if args.command == "topic-entries":
|
||||||
return _run_topic_entries(store, args.topic_slug, args.limit)
|
return _run_topic_entries(store, args.topic_slug, args.limit)
|
||||||
if args.command == "export-topic":
|
if args.command == "export-topic":
|
||||||
return _run_export_topic(store, args.topic_slug, args.output)
|
return _run_export_topic(store, args.topic_slug, args.output, args.include_stubs)
|
||||||
finally:
|
finally:
|
||||||
store.close()
|
store.close()
|
||||||
|
|
||||||
|
|
@ -715,8 +725,14 @@ def _run_show(
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def _run_export(store: BibliographyStore, citation_keys: list[str], output: str | None) -> int:
|
def _run_export(
|
||||||
rendered = store.export_bibtex(citation_keys or None)
|
store: BibliographyStore,
|
||||||
|
citation_keys: list[str],
|
||||||
|
output: str | None,
|
||||||
|
include_stubs: bool,
|
||||||
|
) -> int:
|
||||||
|
explicit_keys = citation_keys or None
|
||||||
|
rendered = store.export_bibtex(explicit_keys, include_stubs=include_stubs or explicit_keys is not None)
|
||||||
if output:
|
if output:
|
||||||
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
||||||
else:
|
else:
|
||||||
|
|
@ -1731,13 +1747,13 @@ def _run_topic_entries(store: BibliographyStore, topic_slug: str, limit: int) ->
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | None) -> int:
|
def _run_export_topic(store: BibliographyStore, topic_slug: str, output: str | None, include_stubs: bool) -> int:
|
||||||
topic = store.get_topic(topic_slug)
|
topic = store.get_topic(topic_slug)
|
||||||
if topic is None:
|
if topic is None:
|
||||||
print(f"Topic not found: {topic_slug}", file=sys.stderr)
|
print(f"Topic not found: {topic_slug}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
citation_keys = [row["citation_key"] for row in store.list_topic_entries(topic_slug, limit=100000)]
|
citation_keys = [row["citation_key"] for row in store.list_topic_entries(topic_slug, limit=100000)]
|
||||||
rendered = store.export_bibtex(citation_keys)
|
rendered = store.export_bibtex(citation_keys, include_stubs=include_stubs)
|
||||||
if output:
|
if output:
|
||||||
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
Path(output).write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -1010,7 +1010,10 @@ class BibliographyStore:
|
||||||
return None
|
return None
|
||||||
return render_bibtex([entry])
|
return render_bibtex([entry])
|
||||||
|
|
||||||
def export_bibtex(self, citation_keys: list[str] | None = None) -> str:
|
def export_bibtex(self, citation_keys: list[str] | None = None, include_stubs: bool | None = None) -> str:
|
||||||
|
explicit_keys = citation_keys is not None
|
||||||
|
if include_stubs is None:
|
||||||
|
include_stubs = explicit_keys
|
||||||
if citation_keys is None:
|
if citation_keys is None:
|
||||||
rows = self.connection.execute(
|
rows = self.connection.execute(
|
||||||
"SELECT citation_key FROM entries ORDER BY COALESCE(year, ''), citation_key"
|
"SELECT citation_key FROM entries ORDER BY COALESCE(year, ''), citation_key"
|
||||||
|
|
@ -1022,6 +1025,8 @@ class BibliographyStore:
|
||||||
for citation_key in citation_keys:
|
for citation_key in citation_keys:
|
||||||
entry = self._load_bib_entry(citation_key)
|
entry = self._load_bib_entry(citation_key)
|
||||||
if entry is not None:
|
if entry is not None:
|
||||||
|
if not include_stubs and self._is_export_stub(entry):
|
||||||
|
continue
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
if not entries:
|
if not entries:
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -1091,6 +1096,22 @@ class BibliographyStore:
|
||||||
fields=dict(fields),
|
fields=dict(fields),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _is_export_stub(self, entry: BibEntry) -> bool:
|
||||||
|
title = " ".join(entry.fields.get("title", "").split()).strip().lower()
|
||||||
|
doi = " ".join(entry.fields.get("doi", "").split()).strip()
|
||||||
|
url = " ".join(entry.fields.get("url", "").split()).strip()
|
||||||
|
has_author = bool(" ".join(entry.fields.get("author", "").split()).strip())
|
||||||
|
has_abstract = bool(" ".join(entry.fields.get("abstract", "").split()).strip())
|
||||||
|
has_journal = bool(" ".join(entry.fields.get("journal", "").split()).strip())
|
||||||
|
has_booktitle = bool(" ".join(entry.fields.get("booktitle", "").split()).strip())
|
||||||
|
if not doi:
|
||||||
|
return False
|
||||||
|
if title and not (title.startswith("referenced work ") or title.startswith("untitled")):
|
||||||
|
return False
|
||||||
|
return not any((has_author, has_abstract, has_journal, has_booktitle)) and (
|
||||||
|
not url or url.startswith("https://doi.org/")
|
||||||
|
)
|
||||||
|
|
||||||
def _load_creator_names(self, citation_key: str, role: str) -> list[str]:
|
def _load_creator_names(self, citation_key: str, role: str) -> list[str]:
|
||||||
rows = self.connection.execute(
|
rows = self.connection.execute(
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,42 @@ def test_cli_ingest_show_search_and_export(tmp_path: Path):
|
||||||
assert "@article{smith2024graphs," in exported
|
assert "@article{smith2024graphs," in exported
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_export_skips_stub_entries_by_default_but_can_include_them(tmp_path: Path):
|
||||||
|
bib_path = tmp_path / "input.bib"
|
||||||
|
bib_path.write_text(
|
||||||
|
"""
|
||||||
|
@misc{stubdoi,
|
||||||
|
title = {Referenced work 6},
|
||||||
|
doi = {10.1200/JCO.2002.04.117},
|
||||||
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{realentry,
|
||||||
|
author = {Smith, Jane},
|
||||||
|
title = {Real Entry},
|
||||||
|
year = {2024},
|
||||||
|
doi = {10.1000/real}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||||
|
|
||||||
|
default_export = run_cli(tmp_path, "export")
|
||||||
|
assert default_export.returncode == 0
|
||||||
|
assert "@article{realentry," in default_export.stdout
|
||||||
|
assert "@misc{stubdoi," not in default_export.stdout
|
||||||
|
|
||||||
|
explicit_export = run_cli(tmp_path, "export", "stubdoi")
|
||||||
|
assert explicit_export.returncode == 0
|
||||||
|
assert "@misc{stubdoi," in explicit_export.stdout
|
||||||
|
|
||||||
|
include_export = run_cli(tmp_path, "export", "--include-stubs")
|
||||||
|
assert include_export.returncode == 0
|
||||||
|
assert "@misc{stubdoi," in include_export.stdout
|
||||||
|
|
||||||
|
|
||||||
def test_cli_provenance_and_status_updates(tmp_path: Path):
|
def test_cli_provenance_and_status_updates(tmp_path: Path):
|
||||||
bib_path = tmp_path / "input.bib"
|
bib_path = tmp_path / "input.bib"
|
||||||
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
|
bib_path.write_text(SAMPLE_BIB, encoding="utf-8")
|
||||||
|
|
@ -1140,6 +1176,52 @@ def test_cli_export_topic(tmp_path: Path):
|
||||||
assert "@article{seed2024," in exported
|
assert "@article{seed2024," in exported
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_export_topic_skips_stub_entries_by_default(tmp_path: Path):
|
||||||
|
bib_path = tmp_path / "input.bib"
|
||||||
|
bib_path.write_text(
|
||||||
|
"""
|
||||||
|
@misc{stubdoi,
|
||||||
|
title = {Referenced work 6},
|
||||||
|
doi = {10.1200/JCO.2002.04.117},
|
||||||
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{seed2024,
|
||||||
|
author = {Seed, Alice},
|
||||||
|
title = {Seed Paper},
|
||||||
|
year = {2024}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||||
|
|
||||||
|
from citegeist.storage import BibliographyStore
|
||||||
|
|
||||||
|
database = tmp_path / "library.sqlite3"
|
||||||
|
store = BibliographyStore(database)
|
||||||
|
try:
|
||||||
|
for citation_key in ("stubdoi", "seed2024"):
|
||||||
|
store.add_entry_topic(
|
||||||
|
citation_key,
|
||||||
|
topic_slug="graph-methods",
|
||||||
|
topic_name="Graph Methods",
|
||||||
|
source_label="topic-seed",
|
||||||
|
)
|
||||||
|
store.connection.commit()
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
||||||
|
default_export = run_cli(tmp_path, "export-topic", "graph-methods")
|
||||||
|
assert default_export.returncode == 0
|
||||||
|
assert "@article{seed2024," in default_export.stdout
|
||||||
|
assert "@misc{stubdoi," not in default_export.stdout
|
||||||
|
|
||||||
|
include_export = run_cli(tmp_path, "export-topic", "graph-methods", "--include-stubs")
|
||||||
|
assert include_export.returncode == 0
|
||||||
|
assert "@misc{stubdoi," in include_export.stdout
|
||||||
|
|
||||||
|
|
||||||
def test_cli_search_can_filter_by_topic(tmp_path: Path):
|
def test_cli_search_can_filter_by_topic(tmp_path: Path):
|
||||||
bib_path = tmp_path / "input.bib"
|
bib_path = tmp_path / "input.bib"
|
||||||
bib_path.write_text(
|
bib_path.write_text(
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,39 @@ def test_store_exports_bibtex_from_normalized_rows():
|
||||||
store.close()
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_store_export_skips_doi_only_stub_by_default():
|
||||||
|
store = BibliographyStore()
|
||||||
|
try:
|
||||||
|
store.ingest_bibtex(
|
||||||
|
"""
|
||||||
|
@misc{stubdoi,
|
||||||
|
title = {Referenced work 6},
|
||||||
|
doi = {10.1200/JCO.2002.04.117},
|
||||||
|
url = {https://doi.org/10.1200/JCO.2002.04.117}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{realentry,
|
||||||
|
author = {Smith, Jane},
|
||||||
|
title = {Real Entry},
|
||||||
|
year = {2024},
|
||||||
|
doi = {10.1000/real}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
exported = store.export_bibtex()
|
||||||
|
assert "@article{realentry," in exported
|
||||||
|
assert "@misc{stubdoi," not in exported
|
||||||
|
|
||||||
|
explicit = store.export_bibtex(["stubdoi"])
|
||||||
|
assert "@misc{stubdoi," in explicit
|
||||||
|
|
||||||
|
with_stubs = store.export_bibtex(include_stubs=True)
|
||||||
|
assert "@misc{stubdoi," in with_stubs
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
def test_store_records_provenance_and_review_status():
|
def test_store_records_provenance_and_review_status():
|
||||||
store = BibliographyStore()
|
store = BibliographyStore()
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue