Add PubMed support to CiteGeist

2026-04-07 01:41:53 -04:00 · 2026-04-07 01:41:53 -04:00 · 663fb1973a
parent 7bdaf37c59
commit 663fb1973a
7 changed files with 444 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -51,7 +51,7 @@ The initial repo includes:
 - staged plaintext reference extraction that now preserves more structured metadata from legacy references, including year suffixes, identifiers, volume/issue/pages, and thesis/report/web-style venue hints;
 - a reference-extraction backend seam with the local `heuristic` parser as the default implementation, so optional external backends can be added later without changing the core extract workflow;
 - standalone verification and disambiguation of free-text references or partial BibTeX into auditable BibTeX/JSON results with `x_status`, `x_confidence`, `x_source`, `x_query`, and alternate-candidate traces;
- identifier-first metadata resolution for DOI, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite title-search fallback;
+- identifier-first metadata resolution for DOI, PMID/PubMed, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite/PubMed title-search fallback;
 - local citation-graph traversal over stored `cites`, `cited_by`, and `crossref` edges;
 - Crossref- and OpenAlex-backed graph expansion that materializes draft related works and edge provenance;
 - a dedicated source-client layer with fixture/cache support for live-source development;
--- a/src/citegeist/bootstrap.py
+++ b/src/citegeist/bootstrap.py
@ -358,15 +358,12 @@ class Bootstrapper:
        })
        return results
 def _deadline_reached(deadline: float | None) -> bool:
    return deadline is not None and time.monotonic() >= deadline
    def _topic_candidates(self, topic: str, seed_keys: list[str], limit: int) -> list[tuple[BibEntry, float]]:
        scored: dict[str, tuple[BibEntry, float]] = {}
-        for source_name, base_score, entries in (
+        for _source_name, base_score, entries in (
            ("openalex", 3.0, self.resolver.search_openalex(topic, limit=limit)),
            ("pubmed", 2.5, self.resolver.search_pubmed(topic, limit=limit)),
            ("crossref", 2.0, self.resolver.search_crossref(topic, limit=limit)),
            ("datacite", 1.5, self.resolver.search_datacite(topic, limit=limit)),
        ):
@ -383,6 +380,10 @@ def _deadline_reached(deadline: float | None) -> bool:
        return ranked[:limit]
 def _deadline_reached(deadline: float | None) -> bool:
    return deadline is not None and time.monotonic() >= deadline
 def _topic_relevance_score(entry: BibEntry, topic: str) -> float:
    topic_terms = _tokenize(topic)
    title_terms = _tokenize(entry.fields.get("title", ""))
--- a/src/citegeist/resolve.py
+++ b/src/citegeist/resolve.py
@ -36,6 +36,11 @@ class MetadataResolver:
            if resolved is not None:
                return resolved
        if pmid := entry.fields.get("pmid"):
            resolved = self.resolve_pmid(pmid)
            if resolved is not None:
                return resolved
        if openalex_id := entry.fields.get("openalex"):
            resolved = self.resolve_openalex(openalex_id)
            if resolved is not None:
@ -73,6 +78,13 @@ class MetadataResolver:
            )
            if resolved is not None:
                return resolved
            resolved = self.search_pubmed_best_match(
                title=title,
                author_text=entry.fields.get("author", ""),
                year=entry.fields.get("year", ""),
            )
            if resolved is not None:
                return resolved
        return None
@ -166,6 +178,23 @@ class MetadataResolver:
            source_label=f"arxiv:id:{arxiv_id}",
        )
    def resolve_pmid(self, pmid: str) -> Resolution | None:
        normalized_pmid = _normalize_pmid(pmid)
        if not normalized_pmid:
            return None
        query = urllib.parse.urlencode({"db": "pubmed", "id": normalized_pmid, "retmode": "xml"})
        root = self._safe_get_xml(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{query}")
        if root is None:
            return None
        article = _find_pubmed_article(root, normalized_pmid)
        if article is None:
            return None
        return Resolution(
            entry=_pubmed_article_to_entry(article, fallback_pmid=normalized_pmid),
            source_type="resolver",
            source_label=f"pubmed:pmid:{normalized_pmid}",
        )
    def resolve_openalex(self, openalex_id: str) -> Resolution | None:
        normalized_id = _normalize_openalex_id(openalex_id)
        payload = self._safe_get_json(f"https://api.openalex.org/works/{normalized_id}")
@ -227,6 +256,30 @@ class MetadataResolver:
            return []
        return [_openalex_work_to_entry(item) for item in payload.get("results", [])]
    def search_pubmed(self, title: str, limit: int = 5) -> list[BibEntry]:
        query_text = " ".join(title.split())
        if not query_text:
            return []
        query = urllib.parse.urlencode(
            {
                "db": "pubmed",
                "retmode": "json",
                "retmax": max(1, limit),
                "term": query_text,
            }
        )
        payload = self._safe_get_json(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?{query}")
        if payload is None:
            return []
        ids = [
            normalized
            for value in payload.get("esearchresult", {}).get("idlist", [])
            if (normalized := _normalize_pmid(str(value)))
        ]
        if not ids:
            return []
        return self._fetch_pubmed_entries(ids[:limit])
    def _safe_get_json(self, url: str) -> dict | None:
        try:
            return self.source_client.get_json(url)
@ -265,6 +318,51 @@ class MetadataResolver:
            source_label=f"openalex:search:{title}",
        )
    def search_pubmed_best_match(
        self,
        title: str,
        author_text: str = "",
        year: str = "",
    ) -> Resolution | None:
        candidate = _select_best_title_match(
            self.search_pubmed(title, limit=5),
            title=title,
            author_text=author_text,
            year=year,
        )
        if candidate is None:
            return None
        return Resolution(
            entry=candidate,
            source_type="resolver",
            source_label=f"pubmed:search:{title}",
        )
    def _fetch_pubmed_entries(self, pmids: list[str]) -> list[BibEntry]:
        ordered_pmids = [pmid for pmid in dict.fromkeys(pmids) if pmid]
        if not ordered_pmids:
            return []
        id_param = ",".join(ordered_pmids)
        summary_query = urllib.parse.urlencode({"db": "pubmed", "retmode": "json", "id": id_param})
        summaries_payload = self._safe_get_json(
            f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?{summary_query}"
        ) or {}
        summaries = summaries_payload.get("result", {})
        fetch_query = urllib.parse.urlencode({"db": "pubmed", "id": id_param, "retmode": "xml"})
        root = self._safe_get_xml(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{fetch_query}")
        articles = _pubmed_articles_by_pmid(root)
        entries: list[BibEntry] = []
        for pmid in ordered_pmids:
            summary = summaries.get(pmid)
            article = articles.get(pmid)
            if not summary and article is None:
                continue
            entries.append(_pubmed_record_to_entry(summary or {}, article, fallback_pmid=pmid))
        return entries
 def merge_entries(base: BibEntry, resolved: BibEntry) -> BibEntry:
    merged, _ = merge_entries_with_conflicts(base, resolved)
    return merged
@ -651,6 +749,214 @@ def _candidate_matches_author_tokens(candidate: BibEntry, author_tokens: set[str
    return bool(author_tokens & candidate_tokens)
 def _normalize_pmid(value: str) -> str:
    return "".join(ch for ch in str(value) if ch.isdigit())
 def _pubmed_articles_by_pmid(root: ET.Element | None) -> dict[str, ET.Element]:
    if root is None:
        return {}
    articles: dict[str, ET.Element] = {}
    for article in root.findall(".//PubmedArticle"):
        pmid = _normalize_pmid(_node_text(article.find("./MedlineCitation/PMID")))
        if pmid:
            articles[pmid] = article
    return articles
 def _find_pubmed_article(root: ET.Element, pmid: str) -> ET.Element | None:
    return _pubmed_articles_by_pmid(root).get(_normalize_pmid(pmid))
 def _pubmed_record_to_entry(summary: dict, article: ET.Element | None, fallback_pmid: str) -> BibEntry:
    if article is not None:
        entry = _pubmed_article_to_entry(article, fallback_pmid=fallback_pmid)
        _merge_pubmed_summary_into_fields(entry.fields, summary, fallback_pmid)
        return entry
    fields = _pubmed_summary_fields(summary, fallback_pmid)
    citation_key = _pubmed_citation_key(
        fields.get("doi", ""),
        fields.get("pmid", ""),
        fields.get("author", ""),
        fields.get("year", ""),
        fields.get("title", ""),
    )
    return BibEntry(entry_type="article", citation_key=citation_key, fields=fields)
 def _pubmed_article_to_entry(article: ET.Element, fallback_pmid: str = "") -> BibEntry:
    medline = article.find("./MedlineCitation")
    article_node = medline.find("./Article") if medline is not None else None
    pubmed_data = article.find("./PubmedData")
    pmid = _normalize_pmid(_node_text(medline.find("./PMID")) if medline is not None else fallback_pmid) or _normalize_pmid(
        fallback_pmid
    )
    title = _normalize_text(_element_text(article_node.find("./ArticleTitle")) if article_node is not None else "")
    authors = " and ".join(
        name
        for name in (_pubmed_author_name(author) for author in article.findall(".//AuthorList/Author"))
        if name
    )
    journal = _normalize_text(_node_text(article.find(".//Journal/Title")))
    year = _pubmed_article_year(article)
    abstract = _pubmed_abstract_text(article)
    doi = _pubmed_article_identifier(article, "doi")
    pmcid = _pubmed_article_identifier(pubmed_data, "pmc")
    fields: dict[str, str] = {}
    if title:
        fields["title"] = title
    if authors:
        fields["author"] = authors
    if year:
        fields["year"] = year
    if journal:
        fields["journal"] = journal
    if abstract:
        fields["abstract"] = abstract
    if doi:
        fields["doi"] = doi
    if pmid:
        fields["pmid"] = pmid
    if pmcid:
        fields["pmcid"] = pmcid
        fields["url"] = f"https://pmc.ncbi.nlm.nih.gov/articles/{pmcid}/"
    elif pmid:
        fields["url"] = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
    citation_key = _pubmed_citation_key(doi, pmid, authors, year, title)
    return BibEntry(entry_type="article", citation_key=citation_key, fields=fields)
 def _merge_pubmed_summary_into_fields(fields: dict[str, str], summary: dict, fallback_pmid: str) -> None:
    for key, value in _pubmed_summary_fields(summary, fallback_pmid).items():
        if value and not fields.get(key):
            fields[key] = value
 def _pubmed_summary_fields(summary: dict, fallback_pmid: str) -> dict[str, str]:
    pmid = _normalize_pmid(str(summary.get("uid") or fallback_pmid))
    title = _normalize_text(str(summary.get("title") or ""))
    year = _pubmed_year_from_text(str(summary.get("pubdate") or ""))
    journal = _normalize_text(str(summary.get("fulljournalname") or ""))
    authors = " and ".join(
        name
        for name in (
            _normalize_person_display_name(str(author.get("name") or ""))
            for author in summary.get("authors", [])
        )
        if name
    )
    doi = ""
    pmcid = ""
    for article_id in summary.get("articleids", []) or []:
        id_type = str(article_id.get("idtype") or "").lower()
        value = str(article_id.get("value") or "")
        if id_type == "doi" and value:
            doi = value
        elif id_type in {"pmc", "pmcid"} and value:
            pmcid = value
    fields: dict[str, str] = {}
    if title:
        fields["title"] = title
    if authors:
        fields["author"] = authors
    if year:
        fields["year"] = year
    if journal:
        fields["journal"] = journal
    if doi:
        fields["doi"] = doi
    if pmid:
        fields["pmid"] = pmid
    if pmcid:
        fields["pmcid"] = pmcid
        fields["url"] = f"https://pmc.ncbi.nlm.nih.gov/articles/{pmcid}/"
    elif pmid:
        fields["url"] = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
    return fields
 def _pubmed_author_name(author: ET.Element) -> str:
    collective = _normalize_text(_node_text(author.find("./CollectiveName")))
    if collective:
        return collective
    family = _normalize_text(_node_text(author.find("./LastName")))
    given = _normalize_text(_node_text(author.find("./ForeName")))
    initials = _normalize_text(_node_text(author.find("./Initials")))
    if family and given:
        return f"{family}, {given}"
    if family and initials:
        normalized_initials = " ".join(f"{letter}." for letter in re.findall(r"[A-Za-z]", initials))
        return f"{family}, {normalized_initials}" if normalized_initials else family
    return family or given
 def _pubmed_article_year(article: ET.Element) -> str:
    for path in (
        ".//JournalIssue/PubDate/Year",
        ".//ArticleDate/Year",
        ".//PubDate/Year",
    ):
        year = _node_text(article.find(path))
        if year:
            return year
    for path in (
        ".//JournalIssue/PubDate/MedlineDate",
        ".//PubDate/MedlineDate",
    ):
        year = _pubmed_year_from_text(_node_text(article.find(path)))
        if year:
            return year
    return ""
 def _pubmed_year_from_text(value: str) -> str:
    match = re.search(r"\b(1[6-9]\d{2}|20\d{2}|21\d{2})\b", value)
    return match.group(1) if match else ""
 def _pubmed_abstract_text(article: ET.Element) -> str:
    parts: list[str] = []
    for node in article.findall(".//Abstract/AbstractText"):
        text = _normalize_text(_element_text(node))
        if not text:
            continue
        label = _normalize_text(node.attrib.get("Label", ""))
        parts.append(f"{label}: {text}" if label else text)
    return " ".join(parts)
 def _pubmed_article_identifier(root: ET.Element | None, identifier_type: str) -> str:
    if root is None:
        return ""
    normalized_type = identifier_type.lower()
    for node in root.findall(".//ArticleId"):
        if str(node.attrib.get("IdType") or "").lower() == normalized_type:
            return _normalize_text(_element_text(node))
    if normalized_type == "doi":
        for node in root.findall(".//ELocationID"):
            if str(node.attrib.get("EIdType") or "").lower() == "doi":
                return _normalize_text(_element_text(node))
    return ""
 def _pubmed_citation_key(doi: str, pmid: str, authors: str, year: str, title: str) -> str:
    if doi:
        suffix = re.sub(r"[^A-Za-z0-9]+", "", doi).lower()
        return f"doi{suffix}"
    if pmid:
        return f"pmid{pmid}"
    return _make_resolution_key(authors or "pubmed", year or "n.d.", title or "untitled")
 def _element_text(node: ET.Element | None) -> str:
    if node is None:
        return ""
    return " ".join("".join(node.itertext()).split())
 def _datacite_work_to_entry(data: dict) -> BibEntry:
    attributes = data.get("attributes", {})
    doi = str(attributes.get("doi") or "")
--- a/src/citegeist/verify.py
+++ b/src/citegeist/verify.py
@ -149,6 +149,20 @@ class BibliographyVerifier:
                    input_type=input_type,
                    input_key=input_key,
                )
        if source_entry is not None and source_entry.fields.get("pmid"):
            direct = self.resolver.resolve_pmid(source_entry.fields["pmid"])
            if direct is not None:
                return VerificationResult(
                    query=query,
                    context=context,
                    status="exact",
                    confidence=1.0,
                    entry=direct.entry,
                    source_label=direct.source_label,
                    alternates=[],
                    input_type=input_type,
                    input_key=input_key,
                )
        candidate_limit = max(1, limit)
        candidates = self._collect_candidates(
@ -209,6 +223,7 @@ class BibliographyVerifier:
            ("crossref", self.resolver.search_crossref(search_title, limit=limit)),
            ("openalex", self.resolver.search_openalex(search_title, limit=limit)),
            ("datacite", self.resolver.search_datacite(search_title, limit=limit)),
            ("pubmed", self.resolver.search_pubmed(search_title, limit=limit)),
        ):
            for entry in source_entries:
                signature = _candidate_signature(entry)
--- a/tests/test_bootstrap.py
+++ b/tests/test_bootstrap.py
@ -34,6 +34,7 @@ def test_bootstrap_from_topic_only():
    try:
        bootstrapper = Bootstrapper()
        bootstrapper.resolver.search_openalex = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: [  # type: ignore[method-assign]
            __import__("citegeist").BibEntry(
@ -139,6 +140,7 @@ def test_bootstrap_ranks_and_deduplicates_topic_candidates():
                fields={"title": "Graph Topic Ranking", "abstract": "graph topic graph"},
            )
        ]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: [  # type: ignore[method-assign]
            BibEntry(
                entry_type="article",
@ -172,6 +174,7 @@ def test_bootstrap_preview_does_not_write_to_database():
        bootstrapper.resolver.search_openalex = lambda topic, limit=5: [  # type: ignore[method-assign]
            BibEntry(entry_type="article", citation_key="preview2024graph", fields={"title": "Preview Graph Topic"})
        ]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
@ -194,6 +197,7 @@ def test_bootstrap_topic_commit_limit_restricts_persisted_candidates():
            BibEntry(entry_type="article", citation_key="rank1", fields={"title": "Graph Topic One"}),
            BibEntry(entry_type="article", citation_key="rank2", fields={"title": "Graph Topic Two"}),
        ]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -227,6 +231,7 @@ def test_bootstrap_topic_candidates_are_attached_to_topic():
                fields={"title": "Graph Topic Result", "year": "2024"},
            )
        ]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -278,6 +283,7 @@ def test_bootstrap_topic_commit_requires_title_anchor():
                },
            ),
        ]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -482,6 +488,7 @@ def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit(
            )
            for index in range(1, 8)
        ][:limit]
        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
--- a/tests/test_resolve.py
+++ b/tests/test_resolve.py
@ -8,6 +8,7 @@ from citegeist.resolve import (
    _crossref_message_to_entry,
    _datacite_work_to_entry,
    _openalex_work_to_entry,
    _pubmed_article_to_entry,
    merge_entries_with_conflicts,
    merge_entries,
 )
@ -88,6 +89,52 @@ def test_arxiv_atom_entry_to_bib_maps_basic_fields():
    assert entry.fields["doi"] == "10.1000/arxiv-example"
 def test_pubmed_article_to_entry_maps_basic_fields():
    xml = ET.fromstring(
        """
 <PubmedArticle>
  <MedlineCitation>
    <PMID>12345678</PMID>
    <Article>
      <ArticleTitle>PubMed Resolved Work</ArticleTitle>
      <Abstract>
        <AbstractText Label="Background">Evidence summary.</AbstractText>
        <AbstractText>Second paragraph.</AbstractText>
      </Abstract>
      <Journal>
        <JournalIssue>
          <PubDate><Year>2021</Year></PubDate>
        </JournalIssue>
        <Title>Journal of Evidence</Title>
      </Journal>
      <AuthorList>
        <Author><LastName>Smith</LastName><ForeName>Jane</ForeName></Author>
      </AuthorList>
      <ELocationID EIdType="doi">10.1000/pubmed-example</ELocationID>
    </Article>
  </MedlineCitation>
  <PubmedData>
    <ArticleIdList>
      <ArticleId IdType="pubmed">12345678</ArticleId>
      <ArticleId IdType="pmc">PMC123456</ArticleId>
    </ArticleIdList>
  </PubmedData>
 </PubmedArticle>
 """
    )
    entry = _pubmed_article_to_entry(xml)
    assert entry.citation_key == "doi101000pubmedexample"
    assert entry.fields["title"] == "PubMed Resolved Work"
    assert entry.fields["author"] == "Smith, Jane"
    assert entry.fields["journal"] == "Journal of Evidence"
    assert entry.fields["year"] == "2021"
    assert entry.fields["pmid"] == "12345678"
    assert entry.fields["pmcid"] == "PMC123456"
    assert entry.fields["abstract"] == "Background: Evidence summary. Second paragraph."
 def test_merge_entries_prefers_existing_values_and_adds_missing_fields():
    base = BibEntry(
        entry_type="article",
@ -209,6 +256,35 @@ def test_resolver_tries_doi_before_dblp():
    ]
 def test_resolver_tries_pmid_before_dblp():
    resolver = MetadataResolver()
    calls: list[tuple[str, str]] = []
    def fake_pmid(value: str):
        calls.append(("pmid", value))
        return None
    def fake_dblp(value: str):
        calls.append(("dblp", value))
        return None
    resolver.resolve_pmid = fake_pmid  # type: ignore[method-assign]
    resolver.resolve_dblp = fake_dblp  # type: ignore[method-assign]
    resolver.resolve_entry(
        BibEntry(
            entry_type="article",
            citation_key="smith2024graphs",
            fields={"pmid": "12345678", "dblp": "conf/test/Smith24"},
        )
    )
    assert calls == [
        ("pmid", "12345678"),
        ("dblp", "conf/test/Smith24"),
    ]
 def test_openalex_work_to_entry_maps_basic_fields():
    entry = _openalex_work_to_entry(
        {
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@ -36,6 +36,37 @@ def test_verifier_uses_direct_doi_resolution_for_bib_entries():
    assert result.source_label == "crossref:doi:10.1000/example"
 def test_verifier_uses_direct_pmid_resolution_for_bib_entries():
    verifier = BibliographyVerifier()
    verifier.resolver.resolve_pmid = lambda value: Resolution(  # type: ignore[method-assign]
        entry=BibEntry(
            entry_type="article",
            citation_key="pmid12345678",
            fields={
                "author": "Smith, Jane",
                "title": "Resolved PubMed Work",
                "year": "2024",
                "pmid": value,
            },
        ),
        source_type="resolver",
        source_label=f"pubmed:pmid:{value}",
    )
    result = verifier.verify_bib_entry(
        BibEntry(
            entry_type="misc",
            citation_key="seed2024",
            fields={"title": "Rough Work", "pmid": "12345678"},
        )
    )
    assert result.status == "exact"
    assert result.confidence == 1.0
    assert result.entry.fields["title"] == "Resolved PubMed Work"
    assert result.source_label == "pubmed:pmid:12345678"
 def test_verifier_scores_and_sorts_search_candidates():
    verifier = BibliographyVerifier()
    verifier.resolver.search_crossref = lambda title, limit=5: [  # type: ignore[method-assign]
@ -61,6 +92,7 @@ def test_verifier_scores_and_sorts_search_candidates():
    ]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]
    result = verifier.verify_string('"Graph-first bibliography augmentation" Smith 2024')
@ -74,6 +106,7 @@ def test_verification_result_to_bib_entry_contains_audit_fields():
    verifier.resolver.search_crossref = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]
    result = verifier._verify_query(  # type: ignore[attr-defined]
        {"title": "Missing Work", "authors": [], "year": "", "venue": ""},