Improve bootstrap preview results
This commit is contained in:
parent
c0fe9de6f0
commit
1ae42ec2c4
|
|
@ -15,6 +15,10 @@ class BootstrapResult:
|
||||||
origin: str
|
origin: str
|
||||||
created: bool
|
created: bool
|
||||||
score: float = 0.0
|
score: float = 0.0
|
||||||
|
title: str = ""
|
||||||
|
author: str = ""
|
||||||
|
year: str = ""
|
||||||
|
abstract: str = ""
|
||||||
|
|
||||||
|
|
||||||
class Bootstrapper:
|
class Bootstrapper:
|
||||||
|
|
@ -57,7 +61,17 @@ class Bootstrapper:
|
||||||
review_status=review_status,
|
review_status=review_status,
|
||||||
)
|
)
|
||||||
seed_keys.append(entry.citation_key)
|
seed_keys.append(entry.citation_key)
|
||||||
results.append(BootstrapResult(entry.citation_key, "seed_bibtex", created))
|
results.append(
|
||||||
|
BootstrapResult(
|
||||||
|
entry.citation_key,
|
||||||
|
"seed_bibtex",
|
||||||
|
created,
|
||||||
|
title=entry.fields.get("title", ""),
|
||||||
|
author=entry.fields.get("author", ""),
|
||||||
|
year=entry.fields.get("year", ""),
|
||||||
|
abstract=entry.fields.get("abstract", ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if topic:
|
if topic:
|
||||||
if not preview_only and (topic_slug or topic_name or topic_phrase):
|
if not preview_only and (topic_slug or topic_name or topic_phrase):
|
||||||
|
|
@ -67,7 +81,8 @@ class Bootstrapper:
|
||||||
source_type="bootstrap",
|
source_type="bootstrap",
|
||||||
expansion_phrase=topic_phrase or topic,
|
expansion_phrase=topic_phrase or topic,
|
||||||
)
|
)
|
||||||
ranked_candidates = self._topic_candidates(topic, seed_keys, topic_limit)
|
candidate_limit = max(topic_limit, topic_commit_limit or 0)
|
||||||
|
ranked_candidates = self._topic_candidates(topic, seed_keys, candidate_limit)
|
||||||
if topic_commit_limit is not None:
|
if topic_commit_limit is not None:
|
||||||
ranked_candidates = ranked_candidates[:topic_commit_limit]
|
ranked_candidates = ranked_candidates[:topic_commit_limit]
|
||||||
|
|
||||||
|
|
@ -82,7 +97,18 @@ class Bootstrapper:
|
||||||
review_status=review_status,
|
review_status=review_status,
|
||||||
)
|
)
|
||||||
seed_keys.append(entry.citation_key)
|
seed_keys.append(entry.citation_key)
|
||||||
results.append(BootstrapResult(entry.citation_key, "topic", created, score=score))
|
results.append(
|
||||||
|
BootstrapResult(
|
||||||
|
entry.citation_key,
|
||||||
|
"topic",
|
||||||
|
created,
|
||||||
|
score=score,
|
||||||
|
title=entry.fields.get("title", ""),
|
||||||
|
author=entry.fields.get("author", ""),
|
||||||
|
year=entry.fields.get("year", ""),
|
||||||
|
abstract=entry.fields.get("abstract", ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if expand and not preview_only:
|
if expand and not preview_only:
|
||||||
expanded_keys = list(dict.fromkeys(seed_keys))
|
expanded_keys = list(dict.fromkeys(seed_keys))
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,44 @@ def test_bootstrap_cli_accepts_seed_and_topic(tmp_path):
|
||||||
assert exit_code == 0
|
assert exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_cli_preview_outputs_candidate_metadata(tmp_path, capsys):
|
||||||
|
from unittest.mock import patch
|
||||||
|
from citegeist.bootstrap import BootstrapResult
|
||||||
|
|
||||||
|
database = tmp_path / "library.sqlite3"
|
||||||
|
with patch("citegeist.cli.Bootstrapper.bootstrap") as mocked_bootstrap:
|
||||||
|
mocked_bootstrap.return_value = [
|
||||||
|
BootstrapResult(
|
||||||
|
citation_key="openalexw123",
|
||||||
|
origin="topic",
|
||||||
|
created=True,
|
||||||
|
score=4.0,
|
||||||
|
title="Artificial Life and Adaptive Behavior",
|
||||||
|
author="Langton, Christopher G.",
|
||||||
|
year="1989",
|
||||||
|
abstract="A foundational overview of artificial life systems.",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"--db",
|
||||||
|
str(database),
|
||||||
|
"bootstrap",
|
||||||
|
"--topic",
|
||||||
|
"artificial life",
|
||||||
|
"--preview",
|
||||||
|
"--topic-commit-limit",
|
||||||
|
"50",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exit_code == 0
|
||||||
|
payload = capsys.readouterr().out
|
||||||
|
assert "Artificial Life and Adaptive Behavior" in payload
|
||||||
|
assert "Langton, Christopher G." in payload
|
||||||
|
assert "A foundational overview of artificial life systems." in payload
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_ranks_and_deduplicates_topic_candidates():
|
def test_bootstrap_ranks_and_deduplicates_topic_candidates():
|
||||||
store = BibliographyStore()
|
store = BibliographyStore()
|
||||||
try:
|
try:
|
||||||
|
|
@ -140,6 +178,7 @@ def test_bootstrap_preview_does_not_write_to_database():
|
||||||
results = bootstrapper.bootstrap(store, topic="graph topic", expand=False, preview_only=True)
|
results = bootstrapper.bootstrap(store, topic="graph topic", expand=False, preview_only=True)
|
||||||
|
|
||||||
assert [item.citation_key for item in results] == ["preview2024graph"]
|
assert [item.citation_key for item in results] == ["preview2024graph"]
|
||||||
|
assert results[0].title == "Preview Graph Topic"
|
||||||
assert store.get_entry("preview2024graph") is None
|
assert store.get_entry("preview2024graph") is None
|
||||||
finally:
|
finally:
|
||||||
store.close()
|
store.close()
|
||||||
|
|
@ -173,3 +212,50 @@ def test_bootstrap_topic_commit_limit_restricts_persisted_candidates():
|
||||||
assert store.get_entry("rank2") is None
|
assert store.get_entry("rank2") is None
|
||||||
finally:
|
finally:
|
||||||
store.close()
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit():
|
||||||
|
store = BibliographyStore()
|
||||||
|
try:
|
||||||
|
bootstrapper = Bootstrapper()
|
||||||
|
from citegeist import BibEntry
|
||||||
|
|
||||||
|
bootstrapper.resolver.search_openalex = lambda topic, limit=5: [ # type: ignore[method-assign]
|
||||||
|
BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key=f"rank{index}",
|
||||||
|
fields={
|
||||||
|
"title": f"Preview Topic Result {index}",
|
||||||
|
"author": f"Author, {index}",
|
||||||
|
"year": f"20{index:02d}",
|
||||||
|
"abstract": f"Abstract {index}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for index in range(1, 8)
|
||||||
|
][:limit]
|
||||||
|
bootstrapper.resolver.search_crossref = lambda topic, limit=5: [] # type: ignore[method-assign]
|
||||||
|
bootstrapper.resolver.search_datacite = lambda topic, limit=5: [] # type: ignore[method-assign]
|
||||||
|
|
||||||
|
results = bootstrapper.bootstrap(
|
||||||
|
store,
|
||||||
|
topic="graph topic",
|
||||||
|
expand=False,
|
||||||
|
preview_only=True,
|
||||||
|
topic_limit=5,
|
||||||
|
topic_commit_limit=7,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [item.citation_key for item in results] == [
|
||||||
|
"rank1",
|
||||||
|
"rank2",
|
||||||
|
"rank3",
|
||||||
|
"rank4",
|
||||||
|
"rank5",
|
||||||
|
"rank6",
|
||||||
|
"rank7",
|
||||||
|
]
|
||||||
|
assert results[0].author == "Author, 1"
|
||||||
|
assert results[0].year == "2001"
|
||||||
|
assert results[0].abstract == "Abstract 1"
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue