CiteGeist/tests/test_openlibrary.py

189 lines
7.7 KiB
Python

from __future__ import annotations
from citegeist.bibtex import BibEntry
from citegeist.resolve import MetadataResolver
from citegeist.sources import OpenLibrarySource, SourceRegistry, list_source_catalog
class FakeSourceClient:
def __init__(self, payload: dict[str, object]) -> None:
self.payload = payload
def try_get_json(self, _url: str) -> dict[str, object]:
return dict(self.payload)
def test_openlibrary_source_normalizes_book_record() -> None:
source = OpenLibrarySource(config={"source_client": FakeSourceClient({})})
entry = source.normalize(
{
"title": "The Nature of the Stratigraphic Record",
"author_name": ["D. V. Ager"],
"first_publish_year": 1973,
"publisher": ["Macmillan"],
"key": "/works/OL82563W",
"edition_key": ["OL12345M"],
"isbn": ["9781234567890"],
}
)
assert entry is not None
assert entry.entry_type == "book"
assert entry.fields["title"] == "The Nature of the Stratigraphic Record"
assert entry.fields["author"] == "D. V. Ager"
assert entry.fields["year"] == "1973"
assert entry.fields["publisher"] == "Macmillan"
assert entry.fields["openlibrary_work"] == "/works/OL82563W"
assert entry.fields["openlibrary_edition"] == "OL12345M"
assert entry.fields["isbn"] == "9781234567890"
def test_openlibrary_registry_and_catalog() -> None:
registry = SourceRegistry()
registry.from_config_dict(
{
"sources": {
"openlibrary": {
"source_type": "openlibrary",
"enabled": True,
}
}
}
)
source = registry.get("openlibrary")
assert isinstance(source, OpenLibrarySource)
catalog = {entry.key: entry for entry in list_source_catalog()}
assert catalog["open_library"].current_status == "integrated"
assert "book_metadata" in catalog["open_library"].capabilities
def test_metadata_resolver_uses_openlibrary_after_other_searches_fail() -> None:
resolver = MetadataResolver()
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
BibEntry(
entry_type="book",
citation_key="olworks123",
fields={
"title": "The Nature of the Stratigraphic Record",
"author": "D. V. Ager",
"year": "1973",
"openlibrary_work": "/works/OL82563W",
},
)
]
result = resolver.resolve_entry(
BibEntry(
entry_type="book",
citation_key="seed1973",
fields={"title": "The Nature of the Stratigraphic Record", "author": "D. V. Ager", "year": "1973"},
)
)
assert result is not None
assert result.source_label == "openlibrary:search:The Nature of the Stratigraphic Record"
def test_metadata_resolver_trace_records_fallback_attempts() -> None:
resolver = MetadataResolver()
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
BibEntry(
entry_type="book",
citation_key="olworks123",
fields={"title": "Example Book", "author": "Author, A", "year": "1980"},
)
]
outcome = resolver.resolve_entry_with_trace(
BibEntry(
entry_type="book",
citation_key="seed1980",
fields={"title": "Example Book", "author": "Author, A", "year": "1980"},
)
)
assert outcome.resolution is not None
assert outcome.resolution.source_label == "openlibrary:search:Example Book"
assert [attempt.source_name for attempt in outcome.attempts[-2:]] == ["semanticscholar", "openlibrary"]
assert outcome.attempts[-1].matched is True
assert outcome.attempts[-1].candidate_count == 1
def test_metadata_resolver_uses_fuzzy_catalog_match_for_book_titles() -> None:
resolver = MetadataResolver()
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openlibrary = lambda _title, limit=5: [ # type: ignore[method-assign]
BibEntry(
entry_type="book",
citation_key="olworks123",
fields={
"title": "The nature of the stratigraphical record",
"author": "D. V. Ager",
"year": "1973",
"openlibrary_work": "/works/OL82563W",
},
)
]
result = resolver.resolve_entry(
BibEntry(
entry_type="book",
citation_key="seed1973",
fields={"title": "The Nature of the Stratigraphic Record", "author": "D. V. Ager", "year": "1973"},
)
)
assert result is not None
assert result.source_label == "openlibrary:search:The Nature of the Stratigraphic Record"
def test_metadata_resolver_skips_openlibrary_for_article_like_entries() -> None:
resolver = MetadataResolver()
resolver.search_crossref = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_datacite = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_openalex = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_pubmed = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_europepmc = lambda _title, limit=5: [] # type: ignore[method-assign]
resolver.search_semanticscholar = lambda _title, limit=5: [] # type: ignore[method-assign]
called = {"openlibrary": False}
def fake_openlibrary(_title: str, limit: int = 5) -> list[BibEntry]:
called["openlibrary"] = True
return []
resolver.search_openlibrary = fake_openlibrary # type: ignore[method-assign]
outcome = resolver.resolve_entry_with_trace(
BibEntry(
entry_type="article",
citation_key="seed1977",
fields={
"title": "Fast locomotion of some African ungulates",
"author": "Alexander, R. M.",
"year": "1977",
"journal": "Journal of Zoology",
},
)
)
assert outcome.resolution is None
assert called["openlibrary"] is False
assert all(attempt.source_name != "openlibrary" for attempt in outcome.attempts)