Harden resolution key generation

This commit is contained in:
welsberr 2026-03-20 11:11:04 -04:00
parent 1ae42ec2c4
commit 6b028a5671
2 changed files with 25 additions and 2 deletions

View File

@ -362,8 +362,15 @@ def _node_text(node: ET.Element | None) -> str:
def _make_resolution_key(author_text: str, year: str, title: str) -> str: def _make_resolution_key(author_text: str, year: str, title: str) -> str:
first_author = author_text.split(" and ")[0] normalized_author_text = " ".join((author_text or "").split())
family_name = first_author.split(",")[0] if "," in first_author else first_author.split()[-1] first_author = normalized_author_text.split(" and ")[0].strip() if normalized_author_text else ""
if "," in first_author:
family_name = first_author.split(",")[0].strip()
elif first_author:
author_tokens = first_author.split()
family_name = author_tokens[-1] if author_tokens else ""
else:
family_name = ""
family_name = "".join(ch for ch in family_name.lower() if ch.isalnum()) or "ref" family_name = "".join(ch for ch in family_name.lower() if ch.isalnum()) or "ref"
first_word = "".join(ch for ch in title.split()[0].lower() if ch.isalnum()) if title.split() else "untitled" first_word = "".join(ch for ch in title.split()[0].lower() if ch.isalnum()) if title.split() else "untitled"
return f"{family_name}{year}{first_word}" return f"{family_name}{year}{first_word}"

View File

@ -31,6 +31,22 @@ def test_crossref_message_to_entry_maps_basic_fields():
assert entry.fields["year"] == "2024" assert entry.fields["year"] == "2024"
def test_crossref_message_to_entry_handles_missing_author_without_crashing():
entry = _crossref_message_to_entry(
{
"type": "journal-article",
"title": ["Avida and digital evolution"],
"container-title": ["Artificial Life"],
"issued": {"date-parts": [[2003, 1, 1]]},
"author": [{"family": "", "given": ""}],
}
)
assert entry.citation_key == "crossref2003avida"
assert entry.fields["title"] == "Avida and digital evolution"
assert entry.fields["year"] == "2003"
def test_arxiv_atom_entry_to_bib_maps_basic_fields(): def test_arxiv_atom_entry_to_bib_maps_basic_fields():
xml = ET.fromstring( xml = ET.fromstring(
""" """