189 lines
7.7 KiB
Python
189 lines
7.7 KiB
Python
from __future__ import annotations
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
from citegeist.resolve import MetadataResolver
|
|
from citegeist.sources import OpenLibrarySource, SourceRegistry, list_source_catalog
|
|
|
|
|
|
class FakeSourceClient:
|
|
def __init__(self, payload: dict[str, object]) -> None:
|
|
self.payload = payload
|
|
|
|
def try_get_json(self, _url: str) -> dict[str, object]:
|
|
return dict(self.payload)
|
|
|
|
|
|
def test_openlibrary_source_normalizes_book_record() -> None:
|
|
source = OpenLibrarySource(config={"source_client": FakeSourceClient({})})
|
|
entry = source.normalize(
|
|
{
|
|
"title": "The Nature of the Stratigraphic Record",
|
|
"author_name": ["D. V. Ager"],
|
|
"first_publish_year": 1973,
|
|
"publisher": ["Macmillan"],
|
|
"key": "/works/OL82563W",
|
|
"edition_key": ["OL12345M"],
|
|
"isbn": ["9781234567890"],
|
|
}
|
|
)
|
|
|
|
assert entry is not None
|
|
assert entry.entry_type == "book"
|
|
assert entry.fields["title"] == "The Nature of the Stratigraphic Record"
|
|
assert entry.fields["author"] == "D. V. Ager"
|
|
assert entry.fields["year"] == "1973"
|
|
assert entry.fields["publisher"] == "Macmillan"
|
|
assert entry.fields["openlibrary_work"] == "/works/OL82563W"
|
|
assert entry.fields["openlibrary_edition"] == "OL12345M"
|
|
assert entry.fields["isbn"] == "9781234567890"
|
|
|
|
|
|
def test_openlibrary_registry_and_catalog() -> None:
|
|
registry = SourceRegistry()
|
|
registry.from_config_dict(
|
|
{
|
|
"sources": {
|
|
"openlibrary": {
|
|
"source_type": "openlibrary",
|
|
"enabled": True,
|
|
}
|
|
}
|
|
}
|
|
)
|
|
source = registry.get("openlibrary")
|
|
assert isinstance(source, OpenLibrarySource)
|
|
|
|
catalog = {entry.key: entry for entry in list_source_catalog()}
|
|
assert catalog["open_library"].current_status == "integrated"
|
|
assert "book_metadata" in catalog["open_library"].capabilities
|
|
|
|
|
|
def test_metadata_resolver_uses_openlibrary_after_other_searches_fail() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="olworks123",
|
|
fields={
|
|
"title": "The Nature of the Stratigraphic Record",
|
|
"author": "D. V. Ager",
|
|
"year": "1973",
|
|
"openlibrary_work": "/works/OL82563W",
|
|
},
|
|
)
|
|
]
|
|
|
|
result = resolver.resolve_entry(
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="seed1973",
|
|
fields={"title": "The Nature of the Stratigraphic Record", "author": "D. V. Ager", "year": "1973"},
|
|
)
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.source_label == "openlibrary:search:The Nature of the Stratigraphic Record"
|
|
|
|
|
|
def test_metadata_resolver_trace_records_fallback_attempts() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="olworks123",
|
|
fields={"title": "Example Book", "author": "Author, A", "year": "1980"},
|
|
)
|
|
]
|
|
|
|
outcome = resolver.resolve_entry_with_trace(
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="seed1980",
|
|
fields={"title": "Example Book", "author": "Author, A", "year": "1980"},
|
|
)
|
|
)
|
|
|
|
assert outcome.resolution is not None
|
|
assert outcome.resolution.source_label == "openlibrary:search:Example Book"
|
|
assert [attempt.source_name for attempt in outcome.attempts[-2:]] == ["semanticscholar", "openlibrary"]
|
|
assert outcome.attempts[-1].matched is True
|
|
assert outcome.attempts[-1].candidate_count == 1
|
|
|
|
|
|
def test_metadata_resolver_uses_fuzzy_catalog_match_for_book_titles() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="olworks123",
|
|
fields={
|
|
"title": "The nature of the stratigraphical record",
|
|
"author": "D. V. Ager",
|
|
"year": "1973",
|
|
"openlibrary_work": "/works/OL82563W",
|
|
},
|
|
)
|
|
]
|
|
|
|
result = resolver.resolve_entry(
|
|
BibEntry(
|
|
entry_type="book",
|
|
citation_key="seed1973",
|
|
fields={"title": "The Nature of the Stratigraphic Record", "author": "D. V. Ager", "year": "1973"},
|
|
)
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.source_label == "openlibrary:search:The Nature of the Stratigraphic Record"
|
|
|
|
|
|
def test_metadata_resolver_skips_openlibrary_for_article_like_entries() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
|
|
called = {"openlibrary": False}
|
|
|
|
def fake_openlibrary(_title: str, limit: int = 5) -> list[BibEntry]:
|
|
called["openlibrary"] = True
|
|
return []
|
|
|
|
resolver.search_openlibrary = fake_openlibrary # type: ignore[method-assign]
|
|
outcome = resolver.resolve_entry_with_trace(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="seed1977",
|
|
fields={
|
|
"title": "Fast locomotion of some African ungulates",
|
|
"author": "Alexander, R. M.",
|
|
"year": "1977",
|
|
"journal": "Journal of Zoology",
|
|
},
|
|
)
|
|
)
|
|
|
|
assert outcome.resolution is None
|
|
assert called["openlibrary"] is False
|
|
assert all(attempt.source_name != "openlibrary" for attempt in outcome.attempts)
|