CiteGeist/tests/test_sources_plugin.py

172 lines
4.8 KiB
Python

"""Tests for the source plugin architecture."""
from __future__ import annotations
import pytest
from citegeist.sources import BibliographicSource, SourceRegistry, CrossRefSource
class MockSource(BibliographicSource):
"""Mock source for testing."""
def __init__(self, config: dict | None = None):
super().__init__(config)
self.lookup_calls = []
def lookup_by_doi(self, doi: str) -> None:
"""Return None to indicate not found."""
self.lookup_calls.append(('doi', doi))
return None
def lookup_by_title(self, title: str) -> None:
"""Return None to indicate not found."""
self.lookup_calls.append(('title', title))
return None
def search(self, query: str, limit: int = 10) -> list:
return []
def normalize(self, record: dict) -> None:
return None
def test_source_base_interface():
"""Test that BibliographicSource base class works."""
source = MockSource()
assert source.is_available()
assert source.get_identifier_scheme() == 'mocksource'
assert source.get_fulltext_url('doi:test') is None
assert source.get_embedding('doi:test') is None
def test_mock_source():
"""Test that mock source implements interface correctly."""
source = MockSource()
source.lookup_by_doi('10.1234/test')
source.lookup_by_title('Test Title')
assert source.lookup_calls == [
('doi', '10.1234/test'),
('title', 'Test Title')
]
def test_source_registry():
"""Test source registry functionality."""
registry = SourceRegistry()
# Register a source
registry.register(MockSource, name='mock_source', config={'enabled': True})
# List sources
sources = registry.list_sources()
assert 'mock_source' in sources
# Get source instance
source = registry.get('mock_source')
assert source is not None
assert isinstance(source, MockSource)
assert source.is_available()
def test_source_registry_disabled():
"""Test that disabled sources are not returned."""
registry = SourceRegistry()
registry.register(
MockSource,
name='disabled_source',
config={'enabled': False}
)
sources = registry.list_sources()
assert 'disabled_source' in sources
# Getting disabled source should return None
source = registry.get('disabled_source')
assert source is None
def test_crossref_source():
"""Test CrossRef source plugin."""
registry = SourceRegistry()
registry.register(CrossRefSource, name='crossref', config={})
source = registry.get('crossref')
assert source is not None
assert source.is_available()
assert source.get_identifier_scheme() == 'doi'
entry = source.normalize(
{
'message': {
'DOI': '10.1234/example',
'title': ['Test Title'],
'author': [{'given': 'Jane', 'family': 'Doe'}],
'published-print': {'date-parts': [[2024]]},
'container-title': ['Journal of Tests'],
'publisher': 'Test Publisher',
'URL': 'https://doi.org/10.1234/example',
'abstract': '<jats:p>Example abstract</jats:p>',
}
}
)
assert entry is not None
assert entry.fields['doi'] == '10.1234/example'
assert entry.fields['title'] == 'Test Title'
assert entry.fields['year'] == '2024'
assert entry.fields['journal'] == 'Journal of Tests'
def test_crossref_search_item_normalization():
source = CrossRefSource()
entry = source.normalize(
{
'DOI': '10.1234/example',
'title': ['Search Result'],
'author': [{'family': 'Doe'}],
'issued': {'date-parts': [[2023]]},
}
)
assert entry is not None
assert entry.fields['doi'] == '10.1234/example'
assert entry.fields['year'] == '2023'
def test_source_record():
"""Test SourceRecord dataclass."""
from citegeist.sources import SourceRecord
record = SourceRecord(
raw={'test': 'data'},
source_type='test',
source_label='test_source',
timestamp='2024-01-01',
confidence=1.0
)
assert record.source_type == 'test'
assert record.source_label == 'test_source'
assert record.confidence == 1.0
assert record.raw == {'test': 'data'}
def test_citation_edge():
"""Test CitationEdge dataclass."""
from citegeist.sources import CitationEdge
edge = CitationEdge(
source_work_id='doi:10.1234',
target_work_id='doi:10.5678',
relation_type='cites',
source_type='crossref',
source_label='crossref:test',
confidence=0.9
)
assert edge.relation_type == 'cites'
assert edge.confidence == 0.9