118 lines
4.6 KiB
Python
118 lines
4.6 KiB
Python
from __future__ import annotations
|
|
|
|
from citegeist.resolve import MetadataResolver
|
|
from citegeist.sources import SemanticScholarSource, SourceRegistry, list_source_catalog
|
|
|
|
|
|
def test_semanticscholar_source_normalizes_record() -> None:
|
|
source = SemanticScholarSource(config={})
|
|
entry = source.normalize(
|
|
{
|
|
"paperId": "abcdef123456",
|
|
"title": "Physics Example",
|
|
"year": 2024,
|
|
"abstract": "Abstract text.",
|
|
"authors": [{"name": "Jane Doe"}, {"name": "Alex Roe"}],
|
|
"externalIds": {"DOI": "10.1000/physics"},
|
|
"journal": {"name": "Physical Review Example"},
|
|
"openAccessPdf": {"url": "https://example.org/paper.pdf"},
|
|
"citationCount": 42,
|
|
"publicationTypes": ["JournalArticle"],
|
|
}
|
|
)
|
|
|
|
assert entry is not None
|
|
assert entry.fields["doi"] == "10.1000/physics"
|
|
assert entry.fields["author"] == "Jane Doe and Alex Roe"
|
|
assert entry.fields["journal"] == "Physical Review Example"
|
|
assert entry.fields["url"] == "https://example.org/paper.pdf"
|
|
assert entry.fields["is_oa"] == "true"
|
|
assert entry.fields["semanticscholar_citation_count"] == "42"
|
|
|
|
|
|
def test_semanticscholar_registry_and_catalog() -> None:
|
|
registry = SourceRegistry()
|
|
registry.from_config_dict(
|
|
{
|
|
"sources": {
|
|
"semanticscholar": {
|
|
"source_type": "semanticscholar",
|
|
"enabled": True,
|
|
}
|
|
}
|
|
}
|
|
)
|
|
source = registry.get("semanticscholar")
|
|
assert isinstance(source, SemanticScholarSource)
|
|
|
|
catalog = {entry.key: entry for entry in list_source_catalog()}
|
|
assert catalog["semantic_scholar"].current_status == "integrated"
|
|
assert catalog["semantic_scholar"].priority == "now"
|
|
|
|
|
|
def test_metadata_resolver_uses_semanticscholar_doi_after_other_lookups_fail() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.resolve_doi = lambda _doi: None # type: ignore[method-assign]
|
|
resolver.resolve_datacite_doi = lambda _doi: None # type: ignore[method-assign]
|
|
resolver.resolve_europepmc_doi = lambda _doi: None # type: ignore[method-assign]
|
|
resolver.semanticscholar.lookup_by_doi = lambda _doi: resolver.semanticscholar.normalize( # type: ignore[method-assign]
|
|
{
|
|
"paperId": "abcdef123456",
|
|
"title": "Physics Example",
|
|
"year": 2024,
|
|
"authors": [{"name": "Jane Doe"}],
|
|
"externalIds": {"DOI": "10.1000/physics"},
|
|
"journal": {"name": "Physical Review Example"},
|
|
"publicationTypes": ["JournalArticle"],
|
|
}
|
|
)
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
result = resolver.resolve_entry(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="seed2024",
|
|
fields={"doi": "10.1000/physics", "title": "Physics Example"},
|
|
)
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.source_label == "semanticscholar:doi:10.1000/physics"
|
|
assert result.entry.fields["journal"] == "Physical Review Example"
|
|
|
|
|
|
def test_metadata_resolver_uses_semanticscholar_title_search_after_other_searches_fail() -> None:
|
|
resolver = MetadataResolver()
|
|
resolver.search_crossref_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
|
|
resolver.search_datacite_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
|
|
resolver.search_openalex_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
|
|
resolver.search_pubmed_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
|
|
resolver.search_europepmc_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
|
|
resolver.semanticscholar.search = lambda _title, limit=5: [ # type: ignore[method-assign]
|
|
resolver.semanticscholar.normalize(
|
|
{
|
|
"paperId": "abcdef123456",
|
|
"title": "Physics Example",
|
|
"year": 2024,
|
|
"authors": [{"name": "Jane Doe"}],
|
|
"externalIds": {"DOI": "10.1000/physics"},
|
|
"journal": {"name": "Physical Review Example"},
|
|
"publicationTypes": ["JournalArticle"],
|
|
}
|
|
)
|
|
]
|
|
|
|
from citegeist.bibtex import BibEntry
|
|
|
|
result = resolver.resolve_entry(
|
|
BibEntry(
|
|
entry_type="article",
|
|
citation_key="seed2024",
|
|
fields={"title": "Physics Example", "author": "Jane Doe", "year": "2024"},
|
|
)
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.source_label == "semanticscholar:search:Physics Example"
|