CiteGeist/tests/test_europepmc.py

124 lines
4.3 KiB
Python

from __future__ import annotations
from citegeist.resolve import MetadataResolver
from citegeist.sources import EuropePmcSource, SourceRegistry, list_source_catalog
def test_europepmc_source_normalizes_core_record() -> None:
source = EuropePmcSource(config={})
entry = source.normalize(
{
"id": "37158217",
"source": "MED",
"pmid": "37158217",
"pmcid": "PMC10000001",
"doi": "10.1000/example",
"title": "Biomedical Example",
"authorString": "Doe J, Roe A",
"journalTitle": "Biomed Journal",
"pubYear": "2024",
"journalVolume": "16",
"issue": "1",
"pageInfo": "10-20",
"abstractText": "Abstract text.",
"isOpenAccess": "Y",
"citedByCount": 12,
"fullTextUrlList": {"fullTextUrl": [{"url": "https://europepmc.org/articles/PMC10000001?pdf=render"}]},
}
)
assert entry is not None
assert entry.fields["doi"] == "10.1000/example"
assert entry.fields["pmid"] == "37158217"
assert entry.fields["pmcid"] == "PMC10000001"
assert entry.fields["journal"] == "Biomed Journal"
assert entry.fields["url"] == "https://europepmc.org/articles/PMC10000001?pdf=render"
assert entry.fields["is_oa"] == "true"
def test_europepmc_registry_and_catalog() -> None:
registry = SourceRegistry()
registry.from_config_dict(
{
"sources": {
"europepmc": {
"source_type": "europepmc",
"enabled": True,
}
}
}
)
source = registry.get("europepmc")
assert isinstance(source, EuropePmcSource)
catalog = {entry.key: entry for entry in list_source_catalog()}
assert catalog["europe_pmc"].current_status == "integrated"
assert catalog["europe_pmc"].priority == "now"
def test_metadata_resolver_uses_europepmc_doi_after_primary_lookups_fail() -> None:
resolver = MetadataResolver()
resolver.resolve_doi = lambda _doi: None # type: ignore[method-assign]
resolver.resolve_datacite_doi = lambda _doi: None # type: ignore[method-assign]
resolver.europepmc.lookup_by_doi = lambda _doi: resolver.europepmc.normalize( # type: ignore[method-assign]
{
"id": "37158217",
"source": "MED",
"pmid": "37158217",
"doi": "10.1000/example",
"title": "Biomedical Example",
"authorString": "Doe J, Roe A",
"journalTitle": "Biomed Journal",
"pubYear": "2024",
}
)
from citegeist.bibtex import BibEntry
result = resolver.resolve_entry(
BibEntry(
entry_type="article",
citation_key="seed2024",
fields={"doi": "10.1000/example", "title": "Biomedical Example"},
)
)
assert result is not None
assert result.source_label == "europepmc:doi:10.1000/example"
assert result.entry.fields["pmid"] == "37158217"
def test_metadata_resolver_uses_europepmc_title_search_after_pubmed() -> None:
resolver = MetadataResolver()
resolver.search_crossref_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
resolver.search_datacite_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
resolver.search_openalex_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
resolver.search_pubmed_best_match = lambda *args, **kwargs: None # type: ignore[method-assign]
resolver.europepmc.search = lambda _title, limit=5: [ # type: ignore[method-assign]
resolver.europepmc.normalize(
{
"id": "37158217",
"source": "MED",
"pmid": "37158217",
"doi": "10.1000/example",
"title": "Biomedical Example",
"authorString": "Doe J, Roe A",
"journalTitle": "Biomed Journal",
"pubYear": "2024",
}
)
]
from citegeist.bibtex import BibEntry
result = resolver.resolve_entry(
BibEntry(
entry_type="article",
citation_key="seed2024",
fields={"title": "Biomedical Example", "author": "Doe J", "year": "2024"},
)
)
assert result is not None
assert result.source_label == "europepmc:search:Biomedical Example"