Add misc re-enrichment mode
This commit is contained in:
parent
753b8a2ccf
commit
ae68ceaa3c
|
|
@ -134,6 +134,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 bootstrap --to
|
|||
PYTHONPATH=src .venv/bin/python -m citegeist extract references.txt --output draft.bib
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topics
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topic-entries abiogenesis
|
||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 export-topic abiogenesis --output abiogenesis.bib
|
||||
|
|
|
|||
|
|
@ -199,6 +199,18 @@ Enrich DOI-bearing placeholder records inside one topic slice:
|
|||
.venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --topic artificial-life --limit 25
|
||||
```
|
||||
|
||||
Preview all current `@misc` entries with DOIs, not just placeholder-like stubs:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --preview --limit 25
|
||||
```
|
||||
|
||||
Re-enrich all current `@misc` entries with DOIs:
|
||||
|
||||
```bash
|
||||
.venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
|
||||
```
|
||||
|
||||
## Explore Citation Graphs
|
||||
|
||||
Purpose: traverse citation edges, export graph data, and render quick visualizations.
|
||||
|
|
|
|||
|
|
@ -82,6 +82,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
action="store_true",
|
||||
help="Only consider candidates that already have a DOI",
|
||||
)
|
||||
resolve_stubs_parser.add_argument(
|
||||
"--all-misc",
|
||||
action="store_true",
|
||||
help="Consider all stored @misc entries instead of only placeholder-like stub records",
|
||||
)
|
||||
resolve_stubs_parser.add_argument(
|
||||
"--topic",
|
||||
help="Optional topic slug to limit candidate selection",
|
||||
|
|
@ -533,7 +538,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.command == "resolve":
|
||||
return _run_resolve(store, args.citation_keys)
|
||||
if args.command == "resolve-stubs":
|
||||
return _run_resolve_stubs(store, args.limit, args.doi_only, args.topic, args.preview)
|
||||
return _run_resolve_stubs(store, args.limit, args.doi_only, args.all_misc, args.topic, args.preview)
|
||||
if args.command == "graph":
|
||||
return _run_graph(
|
||||
store,
|
||||
|
|
@ -824,13 +829,15 @@ def _run_resolve_stubs(
|
|||
store: BibliographyStore,
|
||||
limit: int,
|
||||
doi_only: bool,
|
||||
all_misc: bool,
|
||||
topic_slug: str | None,
|
||||
preview: bool,
|
||||
) -> int:
|
||||
candidates = store.list_resolution_candidates(
|
||||
limit=limit,
|
||||
doi_only=doi_only,
|
||||
stub_only=True,
|
||||
stub_only=not all_misc,
|
||||
misc_only=all_misc,
|
||||
topic_slug=topic_slug,
|
||||
)
|
||||
if preview:
|
||||
|
|
|
|||
|
|
@ -292,7 +292,7 @@ def merge_entries_with_conflicts(base: BibEntry, resolved: BibEntry) -> tuple[Bi
|
|||
merged_fields[key] = value
|
||||
return (
|
||||
BibEntry(
|
||||
entry_type=base.entry_type or resolved.entry_type,
|
||||
entry_type=_merged_entry_type(base.entry_type, resolved.entry_type),
|
||||
citation_key=base.citation_key,
|
||||
fields=merged_fields,
|
||||
),
|
||||
|
|
@ -310,6 +310,12 @@ def _is_placeholder_value(field_name: str, value: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _merged_entry_type(base_entry_type: str, resolved_entry_type: str) -> str:
|
||||
if base_entry_type == "misc" and resolved_entry_type and resolved_entry_type != "misc":
|
||||
return resolved_entry_type
|
||||
return base_entry_type or resolved_entry_type
|
||||
|
||||
|
||||
def _crossref_message_to_entry(message: dict) -> BibEntry:
|
||||
entry_type = _crossref_type_to_bibtype(message.get("type", "article"))
|
||||
title_values = message.get("title", [])
|
||||
|
|
|
|||
|
|
@ -472,6 +472,7 @@ class BibliographyStore:
|
|||
limit: int = 50,
|
||||
doi_only: bool = False,
|
||||
stub_only: bool = False,
|
||||
misc_only: bool = False,
|
||||
topic_slug: str | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
clauses: list[str] = []
|
||||
|
|
@ -489,6 +490,9 @@ class BibliographyStore:
|
|||
if doi_only:
|
||||
clauses.append("e.doi IS NOT NULL AND TRIM(e.doi) <> ''")
|
||||
|
||||
if misc_only:
|
||||
clauses.append("e.entry_type = 'misc'")
|
||||
|
||||
if stub_only:
|
||||
clauses.append(
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -218,6 +218,34 @@ def test_cli_resolve_stubs_preview_lists_doi_stub_candidates(tmp_path: Path):
|
|||
assert payload[0]["title"] == "Referenced work 6"
|
||||
|
||||
|
||||
def test_cli_resolve_stubs_preview_can_target_all_misc_entries(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@misc{miscwithtitle,
|
||||
author = {Doe, Alex},
|
||||
title = {Avida Conference Record},
|
||||
year = {2005},
|
||||
doi = {10.1117/12.512613}
|
||||
}
|
||||
|
||||
@article{complete,
|
||||
author = {Smith, Jane},
|
||||
title = {Complete Record},
|
||||
year = {2024},
|
||||
doi = {10.1000/complete}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||
|
||||
result = run_cli(tmp_path, "resolve-stubs", "--doi-only", "--all-misc", "--preview", "--limit", "10")
|
||||
assert result.returncode == 0
|
||||
payload = json.loads(result.stdout)
|
||||
assert [row["citation_key"] for row in payload] == ["miscwithtitle"]
|
||||
|
||||
|
||||
def test_cli_resolve_stubs_enriches_matching_candidates(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
|
|
@ -271,6 +299,63 @@ def test_cli_resolve_stubs_enriches_matching_candidates(tmp_path: Path):
|
|||
assert payload["review_status"] == "enriched"
|
||||
|
||||
|
||||
def test_cli_resolve_stubs_can_enrich_all_misc_entries(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
"""
|
||||
@misc{miscwithtitle,
|
||||
author = {Doe, Alex},
|
||||
title = {Avida Conference Record},
|
||||
year = {2005},
|
||||
doi = {10.1117/12.512613}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert run_cli(tmp_path, "ingest", str(bib_path)).returncode == 0
|
||||
|
||||
from citegeist.bibtex import BibEntry
|
||||
from citegeist.resolve import Resolution
|
||||
|
||||
database = tmp_path / "library.sqlite3"
|
||||
|
||||
with patch("citegeist.cli.MetadataResolver.resolve_entry") as mocked_resolve:
|
||||
mocked_resolve.return_value = Resolution(
|
||||
entry=BibEntry(
|
||||
entry_type="inproceedings",
|
||||
citation_key="resolvedkey",
|
||||
fields={
|
||||
"author": "Koza, J. R.",
|
||||
"title": "Genetic Programming IV: Routine Human-Competitive Machine Intelligence",
|
||||
"year": "2005",
|
||||
"booktitle": "Genetic and Evolutionary Computation Conference",
|
||||
"doi": "10.1117/12.512613",
|
||||
},
|
||||
),
|
||||
source_type="resolver",
|
||||
source_label="crossref:doi:10.1117/12.512613",
|
||||
)
|
||||
exit_code = main(
|
||||
[
|
||||
"--db",
|
||||
str(database),
|
||||
"resolve-stubs",
|
||||
"--doi-only",
|
||||
"--all-misc",
|
||||
"--limit",
|
||||
"10",
|
||||
]
|
||||
)
|
||||
|
||||
assert exit_code == 0
|
||||
show = run_cli(tmp_path, "show", "--conflicts", "miscwithtitle")
|
||||
payload = json.loads(show.stdout)
|
||||
assert payload["entry_type"] == "inproceedings"
|
||||
assert payload["title"] == "Avida Conference Record"
|
||||
assert payload["booktitle"] == "Genetic and Evolutionary Computation Conference"
|
||||
assert "title" in {item["field_name"] for item in payload["field_conflicts"]}
|
||||
|
||||
|
||||
def test_cli_resolve_conflicts_updates_status(tmp_path: Path):
|
||||
bib_path = tmp_path / "input.bib"
|
||||
bib_path.write_text(
|
||||
|
|
|
|||
|
|
@ -128,6 +128,30 @@ def test_merge_entries_replaces_placeholder_titles_without_conflict():
|
|||
assert conflicts == []
|
||||
|
||||
|
||||
def test_merge_entries_upgrades_misc_type_when_resolver_has_better_type():
|
||||
base = BibEntry(
|
||||
entry_type="misc",
|
||||
citation_key="miscwithtitle",
|
||||
fields={"title": "Avida Conference Record", "doi": "10.1117/12.512613"},
|
||||
)
|
||||
resolved = BibEntry(
|
||||
entry_type="inproceedings",
|
||||
citation_key="resolved",
|
||||
fields={"title": "Genetic Programming IV", "booktitle": "GECCO"},
|
||||
)
|
||||
|
||||
merged, conflicts = merge_entries_with_conflicts(base, resolved)
|
||||
|
||||
assert merged.entry_type == "inproceedings"
|
||||
assert conflicts == [
|
||||
{
|
||||
"field_name": "title",
|
||||
"current_value": "Avida Conference Record",
|
||||
"proposed_value": "Genetic Programming IV",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_resolver_tries_doi_before_dblp():
|
||||
resolver = MetadataResolver()
|
||||
calls: list[tuple[str, str]] = []
|
||||
|
|
|
|||
|
|
@ -354,6 +354,33 @@ def test_store_lists_stub_resolution_candidates():
|
|||
store.close()
|
||||
|
||||
|
||||
def test_store_can_list_all_misc_resolution_candidates():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@misc{miscwithtitle,
|
||||
author = {Doe, Alex},
|
||||
title = {Avida Conference Record},
|
||||
year = {2005},
|
||||
doi = {10.1117/12.512613}
|
||||
}
|
||||
|
||||
@article{complete,
|
||||
author = {Smith, Jane},
|
||||
title = {Complete Record},
|
||||
year = {2024},
|
||||
doi = {10.1000/complete}
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
candidates = store.list_resolution_candidates(limit=10, doi_only=True, misc_only=True)
|
||||
assert [row["citation_key"] for row in candidates] == ["miscwithtitle"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_store_can_stage_and_review_topic_phrase_suggestion():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in New Issue