Extend JabRef sync workflow with review annotations
This commit is contained in:
parent
b26f662af9
commit
65fde034e1
17
README.md
17
README.md
|
|
@ -176,6 +176,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output ver
|
|||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 sync-jabref my-library.bib --output my-library.enriched.bib
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topics
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topic-entries abiogenesis
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 export-topic abiogenesis --output abiogenesis.bib
|
||||
|
|
@ -208,6 +209,22 @@ OpenAlex expansion is also conservative about noisy secondary records. Discoveri
|
|||
|
||||
For live-source development, prefer fixture-backed or cache-backed source clients so resolver and expansion work can be exercised repeatedly without re-hitting upstream APIs on every run.
|
||||
|
||||
## JabRef Workflow
|
||||
|
||||
`citegeist` is not a replacement for JabRef's day-to-day BibTeX editing UX. The intended near-term integration model is file-based round-tripping:
|
||||
|
||||
- use JabRef to inspect, edit, and review your main `.bib` library;
|
||||
- use `citegeist sync-jabref` to ingest that file into CiteGeist, run metadata enrichment against imported entries, and write an enriched `.bib` export back out;
|
||||
- reopen the enriched file in JabRef for human review and curation.
|
||||
|
||||
That keeps JabRef as the primary manual review surface while letting CiteGeist handle source-backed resolution and discovery work that reference managers usually do not automate well.
|
||||
|
||||
Useful options for that round trip:
|
||||
|
||||
- `--in-place`: overwrite the input `.bib` file instead of writing to a separate export path
|
||||
- `--annotate-review`: add `x_citegeist_*` sidecar fields such as review status, open-conflict count, and last source label so JabRef can surface CiteGeist review cues directly in the BibTeX record
|
||||
- `--no-resolve`: skip live metadata resolution and only perform import plus re-export
|
||||
|
||||
## Adopted Ideas From Earlier Repos
|
||||
|
||||
`citegeist` now absorbs two useful patterns from adjacent bibliography tools while keeping them inside the main Python 3 package boundary:
|
||||
|
|
|
|||
|
|
@ -315,6 +315,26 @@ Limit discoveries per seed:
|
|||
.venv/bin/python -m citegeist --db library.sqlite3 expand langton1989artificial1 --source openalex --limit 10
|
||||
```
|
||||
|
||||
### JabRef Round Trip
|
||||
|
||||
Use JabRef as the main editor and CiteGeist as the enrichment pass:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 sync-jabref my-library.bib --output my-library.enriched.bib
|
||||
```
|
||||
|
||||
Skip resolver calls if you only want a normalized import/export pass:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 sync-jabref my-library.bib --output my-library.enriched.bib --no-resolve
|
||||
```
|
||||
|
||||
Write back to the same file and include CiteGeist review cues as BibTeX sidecar fields for JabRef:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 sync-jabref my-library.bib --in-place --annotate-review
|
||||
```
|
||||
|
||||
## Build A Topic-Centered Bibliography
|
||||
|
||||
Purpose: create, expand, inspect, and export a topic slice such as `artificial life`.
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import sys
|
|||
from pathlib import Path
|
||||
|
||||
from .batch import BatchBootstrapRunner, load_batch_jobs
|
||||
from .bibtex import parse_bibtex, render_bibtex
|
||||
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
||||
from .bootstrap import Bootstrapper
|
||||
from .examples.talkorigins import TalkOriginsScraper
|
||||
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
|
||||
|
|
@ -56,6 +56,30 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
help="Include DOI-only placeholder records in broad exports",
|
||||
)
|
||||
|
||||
sync_jabref_parser = subparsers.add_parser(
|
||||
"sync-jabref",
|
||||
help="Round-trip a JabRef-managed BibTeX file through CiteGeist ingest, enrichment, and export",
|
||||
)
|
||||
sync_jabref_parser.add_argument("input", help="BibTeX file managed in JabRef")
|
||||
sync_jabref_parser.add_argument("--output", help="Path to write the enriched BibTeX export")
|
||||
sync_jabref_parser.add_argument(
|
||||
"--in-place",
|
||||
action="store_true",
|
||||
help="Write the enriched BibTeX back to the input file instead of a separate output path",
|
||||
)
|
||||
sync_jabref_parser.add_argument("--status", default="draft", help="Initial review status for newly ingested entries")
|
||||
sync_jabref_parser.add_argument("--source-label", help="Provenance label for the ingest step")
|
||||
sync_jabref_parser.add_argument(
|
||||
"--no-resolve",
|
||||
action="store_true",
|
||||
help="Skip metadata resolution after ingest and only re-export the imported entries",
|
||||
)
|
||||
sync_jabref_parser.add_argument(
|
||||
"--annotate-review",
|
||||
action="store_true",
|
||||
help="Add CiteGeist review/status sidecar fields to the exported BibTeX for easier JabRef review",
|
||||
)
|
||||
|
||||
status_parser = subparsers.add_parser("set-status", help="Set the review status for one entry")
|
||||
status_parser.add_argument("citation_key", help="Citation key to update")
|
||||
status_parser.add_argument("review_status", help="New review status")
|
||||
|
|
@ -662,6 +686,17 @@ def main(argv: list[str] | None = None) -> int:
|
|||
return _run_show(store, args.citation_key, args.limit, args.provenance, args.conflicts)
|
||||
if args.command == "export":
|
||||
return _run_export(store, args.citation_keys, args.output, args.include_stubs)
|
||||
if args.command == "sync-jabref":
|
||||
return _run_sync_jabref(
|
||||
store,
|
||||
Path(args.input),
|
||||
Path(args.output) if args.output else None,
|
||||
args.in_place,
|
||||
args.status,
|
||||
args.source_label,
|
||||
args.no_resolve,
|
||||
args.annotate_review,
|
||||
)
|
||||
if args.command == "set-status":
|
||||
return _run_set_status(store, args.citation_key, args.review_status)
|
||||
if args.command == "resolve-conflicts":
|
||||
|
|
@ -907,6 +942,103 @@ def _run_export(
|
|||
return 0
|
||||
|
||||
|
||||
def _run_sync_jabref(
|
||||
store: BibliographyStore,
|
||||
input_path: Path,
|
||||
output_path: Path | None,
|
||||
in_place: bool,
|
||||
review_status: str,
|
||||
source_label: str | None,
|
||||
skip_resolve: bool,
|
||||
annotate_review: bool,
|
||||
) -> int:
|
||||
if in_place:
|
||||
effective_output_path = input_path
|
||||
elif output_path is not None:
|
||||
effective_output_path = output_path
|
||||
else:
|
||||
print("sync-jabref requires --output or --in-place", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
text = input_path.read_text(encoding="utf-8")
|
||||
imported_keys = store.ingest_bibtex(
|
||||
text,
|
||||
source_label=source_label or str(input_path),
|
||||
review_status=review_status,
|
||||
)
|
||||
|
||||
resolved_keys: list[str] = []
|
||||
failed_keys: list[str] = []
|
||||
if not skip_resolve:
|
||||
resolver = MetadataResolver()
|
||||
total = len(imported_keys)
|
||||
for index, citation_key in enumerate(imported_keys, start=1):
|
||||
_print_progress("sync-jabref resolving", index, total, citation_key)
|
||||
if _resolve_one(store, resolver, citation_key):
|
||||
resolved_keys.append(citation_key)
|
||||
else:
|
||||
failed_keys.append(citation_key)
|
||||
|
||||
rendered = _render_jabref_sync_export(store, imported_keys, annotate_review=annotate_review)
|
||||
effective_output_path.write_text(rendered + ("\n" if rendered else ""), encoding="utf-8")
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"input": str(input_path),
|
||||
"output": str(effective_output_path),
|
||||
"imported_count": len(imported_keys),
|
||||
"resolved_count": len(resolved_keys),
|
||||
"failed_resolve_count": len(failed_keys),
|
||||
"skipped_resolution": skip_resolve,
|
||||
"annotated_review": annotate_review,
|
||||
"in_place": in_place,
|
||||
"citation_keys": imported_keys,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
return 0 if skip_resolve or not failed_keys else 1
|
||||
|
||||
|
||||
def _render_jabref_sync_export(
|
||||
store: BibliographyStore,
|
||||
citation_keys: list[str],
|
||||
*,
|
||||
annotate_review: bool,
|
||||
) -> str:
|
||||
entries: list[BibEntry] = []
|
||||
for citation_key in citation_keys:
|
||||
entry = store.get_bib_entry(citation_key)
|
||||
if entry is None:
|
||||
continue
|
||||
if annotate_review:
|
||||
entry = _annotated_jabref_entry(store, entry)
|
||||
entries.append(entry)
|
||||
return render_bibtex(entries) if entries else ""
|
||||
|
||||
|
||||
def _annotated_jabref_entry(store: BibliographyStore, entry: BibEntry) -> BibEntry:
|
||||
row = store.get_entry(entry.citation_key) or {}
|
||||
annotated = BibEntry(
|
||||
entry_type=entry.entry_type,
|
||||
citation_key=entry.citation_key,
|
||||
fields=dict(entry.fields),
|
||||
)
|
||||
review_status = str(row.get("review_status") or "")
|
||||
if review_status:
|
||||
annotated.fields["x_citegeist_review_status"] = review_status
|
||||
open_conflicts = store.get_field_conflicts(entry.citation_key, status="open")
|
||||
if open_conflicts:
|
||||
annotated.fields["x_citegeist_open_conflicts"] = str(len(open_conflicts))
|
||||
annotated.fields["x_citegeist_conflict_fields"] = ", ".join(
|
||||
sorted({str(conflict.get("field_name") or "") for conflict in open_conflicts if conflict.get("field_name")})
|
||||
)
|
||||
provenance = store.get_field_provenance(entry.citation_key)
|
||||
if provenance:
|
||||
annotated.fields["x_citegeist_last_source"] = str(provenance[-1].get("source_label") or "")
|
||||
return annotated
|
||||
|
||||
|
||||
def _run_set_status(store: BibliographyStore, citation_key: str, review_status: str) -> int:
|
||||
if not store.set_entry_status(citation_key, review_status):
|
||||
print(f"Entry not found: {citation_key}", file=sys.stderr)
|
||||
|
|
|
|||
|
|
@ -1048,6 +1048,9 @@ class BibliographyStore:
|
|||
return None
|
||||
return render_bibtex([entry])
|
||||
|
||||
def get_bib_entry(self, citation_key: str) -> BibEntry | None:
|
||||
return self._load_bib_entry(citation_key)
|
||||
|
||||
def export_bibtex(self, citation_keys: list[str] | None = None, include_stubs: bool | None = None) -> str:
|
||||
return self.export_bibtex_report(citation_keys, include_stubs=include_stubs)["bibtex"]
|
||||
|
||||
|
|
|
|||
|
|
@ -247,6 +247,203 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
|
|||
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
||||
|
||||
|
||||
def test_cli_sync_jabref_ingests_resolves_and_exports(tmp_path: Path):
|
||||
bib_path = tmp_path / "jabref-library.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@article{smith2024graphs,
|
||||
author = {Smith, Jane},
|
||||
title = {Graph-first bibliography augmentation},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
output_path = tmp_path / "jabref-library.enriched.bib"
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self) -> None:
|
||||
self.ingest_calls: list[tuple[str, str, str]] = []
|
||||
self.closed = False
|
||||
|
||||
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
||||
self.ingest_calls.append((text, source_label, review_status))
|
||||
return ["smith2024graphs"]
|
||||
|
||||
def get_bib_entry(self, citation_key: str):
|
||||
from citegeist.bibtex import BibEntry
|
||||
|
||||
return BibEntry("article", citation_key, {"title": "Resolved Work"})
|
||||
|
||||
def get_entry(self, citation_key: str):
|
||||
return {"citation_key": citation_key, "review_status": "enriched"}
|
||||
|
||||
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
||||
return []
|
||||
|
||||
def get_field_provenance(self, citation_key: str):
|
||||
return []
|
||||
|
||||
def close(self) -> None:
|
||||
self.closed = True
|
||||
|
||||
fake_store = FakeStore()
|
||||
resolve_calls: list[str] = []
|
||||
|
||||
stdout_buffer = io.StringIO()
|
||||
with (
|
||||
patch("citegeist.cli.BibliographyStore", return_value=fake_store),
|
||||
patch("citegeist.cli.MetadataResolver"),
|
||||
patch("citegeist.cli.render_bibtex", return_value="@article{smith2024graphs,\n title = {Resolved Work}\n}"),
|
||||
patch(
|
||||
"citegeist.cli._resolve_one",
|
||||
side_effect=lambda store, resolver, citation_key: resolve_calls.append(citation_key) or True,
|
||||
),
|
||||
redirect_stdout(stdout_buffer),
|
||||
):
|
||||
exit_code = main(
|
||||
[
|
||||
"--db",
|
||||
str(tmp_path / "library.sqlite3"),
|
||||
"sync-jabref",
|
||||
str(bib_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--status",
|
||||
"draft",
|
||||
"--source-label",
|
||||
"jabref:test",
|
||||
]
|
||||
)
|
||||
|
||||
assert exit_code == 0
|
||||
assert fake_store.ingest_calls[0][1:] == ("jabref:test", "draft")
|
||||
assert resolve_calls == ["smith2024graphs"]
|
||||
assert "@article{smith2024graphs," in output_path.read_text(encoding="utf-8")
|
||||
payload = json.loads(stdout_buffer.getvalue())
|
||||
assert payload["imported_count"] == 1
|
||||
assert payload["resolved_count"] == 1
|
||||
assert payload["failed_resolve_count"] == 0
|
||||
assert payload["skipped_resolution"] is False
|
||||
|
||||
|
||||
def test_cli_sync_jabref_can_skip_resolution(tmp_path: Path):
|
||||
bib_path = tmp_path / "jabref-library.bib"
|
||||
bib_path.write_text("@article{seed2024, title = {Seed}}\n", encoding="utf-8")
|
||||
output_path = tmp_path / "jabref-library.enriched.bib"
|
||||
|
||||
class FakeStore:
|
||||
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
||||
return ["seed2024"]
|
||||
|
||||
def get_bib_entry(self, citation_key: str):
|
||||
from citegeist.bibtex import BibEntry
|
||||
|
||||
return BibEntry("article", citation_key, {"title": "Seed"})
|
||||
|
||||
def get_entry(self, citation_key: str):
|
||||
return {"citation_key": citation_key, "review_status": "draft"}
|
||||
|
||||
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
||||
return []
|
||||
|
||||
def get_field_provenance(self, citation_key: str):
|
||||
return []
|
||||
|
||||
def close(self) -> None:
|
||||
return None
|
||||
|
||||
stdout_buffer = io.StringIO()
|
||||
with (
|
||||
patch("citegeist.cli.BibliographyStore", return_value=FakeStore()),
|
||||
patch("citegeist.cli.render_bibtex", return_value="@article{seed2024,\n title = {Seed}\n}"),
|
||||
patch("citegeist.cli._resolve_one") as mocked_resolve,
|
||||
redirect_stdout(stdout_buffer),
|
||||
):
|
||||
exit_code = main(
|
||||
[
|
||||
"--db",
|
||||
str(tmp_path / "library.sqlite3"),
|
||||
"sync-jabref",
|
||||
str(bib_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--no-resolve",
|
||||
]
|
||||
)
|
||||
|
||||
assert exit_code == 0
|
||||
mocked_resolve.assert_not_called()
|
||||
payload = json.loads(stdout_buffer.getvalue())
|
||||
assert payload["skipped_resolution"] is True
|
||||
assert payload["resolved_count"] == 0
|
||||
|
||||
|
||||
def test_cli_sync_jabref_can_annotate_review_fields_and_write_in_place(tmp_path: Path):
|
||||
bib_path = tmp_path / "jabref-library.bib"
|
||||
bib_path.write_text("@article{seed2024, title = {Seed}}\n", encoding="utf-8")
|
||||
|
||||
class FakeStore:
|
||||
def ingest_bibtex(self, text: str, source_label: str, review_status: str) -> list[str]:
|
||||
return ["seed2024"]
|
||||
|
||||
def get_bib_entry(self, citation_key: str):
|
||||
from citegeist.bibtex import BibEntry
|
||||
|
||||
return BibEntry("article", citation_key, {"title": "Seed"})
|
||||
|
||||
def get_entry(self, citation_key: str):
|
||||
return {"citation_key": citation_key, "review_status": "enriched"}
|
||||
|
||||
def get_field_conflicts(self, citation_key: str, status: str | None = None):
|
||||
return [{"field_name": "title"}]
|
||||
|
||||
def get_field_provenance(self, citation_key: str):
|
||||
return [{"source_label": "pubmed:pmid:12345678"}]
|
||||
|
||||
def close(self) -> None:
|
||||
return None
|
||||
|
||||
stdout_buffer = io.StringIO()
|
||||
with (
|
||||
patch("citegeist.cli.BibliographyStore", return_value=FakeStore()),
|
||||
patch(
|
||||
"citegeist.cli.render_bibtex",
|
||||
side_effect=lambda entries: "\n".join(
|
||||
[
|
||||
"@article{seed2024,",
|
||||
f" title = {{{entries[0].fields['title']}}},",
|
||||
f" x_citegeist_review_status = {{{entries[0].fields.get('x_citegeist_review_status', '')}}},",
|
||||
f" x_citegeist_open_conflicts = {{{entries[0].fields.get('x_citegeist_open_conflicts', '')}}},",
|
||||
f" x_citegeist_last_source = {{{entries[0].fields.get('x_citegeist_last_source', '')}}}",
|
||||
"}",
|
||||
]
|
||||
),
|
||||
),
|
||||
patch("citegeist.cli._resolve_one", return_value=True),
|
||||
redirect_stdout(stdout_buffer),
|
||||
):
|
||||
exit_code = main(
|
||||
[
|
||||
"--db",
|
||||
str(tmp_path / "library.sqlite3"),
|
||||
"sync-jabref",
|
||||
str(bib_path),
|
||||
"--in-place",
|
||||
"--annotate-review",
|
||||
]
|
||||
)
|
||||
|
||||
assert exit_code == 0
|
||||
rendered = bib_path.read_text(encoding="utf-8")
|
||||
assert "x_citegeist_review_status" in rendered
|
||||
assert "x_citegeist_open_conflicts" in rendered
|
||||
assert "x_citegeist_last_source" in rendered
|
||||
payload = json.loads(stdout_buffer.getvalue())
|
||||
assert payload["in_place"] is True
|
||||
assert payload["annotated_review"] is True
|
||||
|
||||
|
||||
def test_cli_resolve_updates_entry(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
|
|
|
|||
Loading…
Reference in New Issue