diff --git a/src/citegeist/resolve.py b/src/citegeist/resolve.py index 5e5a205..b17a774 100644 --- a/src/citegeist/resolve.py +++ b/src/citegeist/resolve.py @@ -362,8 +362,15 @@ def _node_text(node: ET.Element | None) -> str: def _make_resolution_key(author_text: str, year: str, title: str) -> str: - first_author = author_text.split(" and ")[0] - family_name = first_author.split(",")[0] if "," in first_author else first_author.split()[-1] + normalized_author_text = " ".join((author_text or "").split()) + first_author = normalized_author_text.split(" and ")[0].strip() if normalized_author_text else "" + if "," in first_author: + family_name = first_author.split(",")[0].strip() + elif first_author: + author_tokens = first_author.split() + family_name = author_tokens[-1] if author_tokens else "" + else: + family_name = "" family_name = "".join(ch for ch in family_name.lower() if ch.isalnum()) or "ref" first_word = "".join(ch for ch in title.split()[0].lower() if ch.isalnum()) if title.split() else "untitled" return f"{family_name}{year}{first_word}" diff --git a/tests/test_resolve.py b/tests/test_resolve.py index 226ee10..69f2a7c 100644 --- a/tests/test_resolve.py +++ b/tests/test_resolve.py @@ -31,6 +31,22 @@ def test_crossref_message_to_entry_maps_basic_fields(): assert entry.fields["year"] == "2024" +def test_crossref_message_to_entry_handles_missing_author_without_crashing(): + entry = _crossref_message_to_entry( + { + "type": "journal-article", + "title": ["Avida and digital evolution"], + "container-title": ["Artificial Life"], + "issued": {"date-parts": [[2003, 1, 1]]}, + "author": [{"family": "", "given": ""}], + } + ) + + assert entry.citation_key == "crossref2003avida" + assert entry.fields["title"] == "Avida and digital evolution" + assert entry.fields["year"] == "2003" + + def test_arxiv_atom_entry_to_bib_maps_basic_fields(): xml = ET.fromstring( """