Add PubMed support to CiteGeist

2026-04-07 01:41:53 -04:00 · 2026-04-07 01:41:53 -04:00 · 663fb1973a
parent 7bdaf37c59
commit 663fb1973a
7 changed files with 444 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -51,7 +51,7 @@ The initial repo includes:
 - staged plaintext reference extraction that now preserves more structured metadata from legacy references, including year suffixes, identifiers, volume/issue/pages, and thesis/report/web-style venue hints;
 - a reference-extraction backend seam with the local `heuristic` parser as the default implementation, so optional external backends can be added later without changing the core extract workflow;
 - standalone verification and disambiguation of free-text references or partial BibTeX into auditable BibTeX/JSON results with `x_status`, `x_confidence`, `x_source`, `x_query`, and alternate-candidate traces;
- identifier-first metadata resolution for DOI, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite title-search fallback;
+- identifier-first metadata resolution for DOI, PMID/PubMed, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite/PubMed title-search fallback;
 - local citation-graph traversal over stored `cites`, `cited_by`, and `crossref` edges;
 - Crossref- and OpenAlex-backed graph expansion that materializes draft related works and edge provenance;
 - a dedicated source-client layer with fixture/cache support for live-source development;
--- a/src/citegeist/bootstrap.py
+++ b/src/citegeist/bootstrap.py
@ -358,15 +358,12 @@ class Bootstrapper:
        })
        return results

-
-def _deadline_reached(deadline: float | None) -> bool:
-    return deadline is not None and time.monotonic() >= deadline
-
    def _topic_candidates(self, topic: str, seed_keys: list[str], limit: int) -> list[tuple[BibEntry, float]]:
        scored: dict[str, tuple[BibEntry, float]] = {}

-        for source_name, base_score, entries in (
+        for _source_name, base_score, entries in (
            ("openalex", 3.0, self.resolver.search_openalex(topic, limit=limit)),
+            ("pubmed", 2.5, self.resolver.search_pubmed(topic, limit=limit)),
            ("crossref", 2.0, self.resolver.search_crossref(topic, limit=limit)),
            ("datacite", 1.5, self.resolver.search_datacite(topic, limit=limit)),
        ):
@ -383,6 +380,10 @@ def _deadline_reached(deadline: float | None) -> bool:
        return ranked[:limit]


+def _deadline_reached(deadline: float | None) -> bool:
+    return deadline is not None and time.monotonic() >= deadline
+
+
 def _topic_relevance_score(entry: BibEntry, topic: str) -> float:
    topic_terms = _tokenize(topic)
    title_terms = _tokenize(entry.fields.get("title", ""))
--- a/src/citegeist/resolve.py
+++ b/src/citegeist/resolve.py
@ -36,6 +36,11 @@ class MetadataResolver:
            if resolved is not None:
                return resolved

+        if pmid := entry.fields.get("pmid"):
+            resolved = self.resolve_pmid(pmid)
+            if resolved is not None:
+                return resolved
+
        if openalex_id := entry.fields.get("openalex"):
            resolved = self.resolve_openalex(openalex_id)
            if resolved is not None:
@ -73,6 +78,13 @@ class MetadataResolver:
            )
            if resolved is not None:
                return resolved
+            resolved = self.search_pubmed_best_match(
+                title=title,
+                author_text=entry.fields.get("author", ""),
+                year=entry.fields.get("year", ""),
+            )
+            if resolved is not None:
+                return resolved

        return None

@ -166,6 +178,23 @@ class MetadataResolver:
            source_label=f"arxiv:id:{arxiv_id}",
        )

+    def resolve_pmid(self, pmid: str) -> Resolution | None:
+        normalized_pmid = _normalize_pmid(pmid)
+        if not normalized_pmid:
+            return None
+        query = urllib.parse.urlencode({"db": "pubmed", "id": normalized_pmid, "retmode": "xml"})
+        root = self._safe_get_xml(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{query}")
+        if root is None:
+            return None
+        article = _find_pubmed_article(root, normalized_pmid)
+        if article is None:
+            return None
+        return Resolution(
+            entry=_pubmed_article_to_entry(article, fallback_pmid=normalized_pmid),
+            source_type="resolver",
+            source_label=f"pubmed:pmid:{normalized_pmid}",
+        )
+
    def resolve_openalex(self, openalex_id: str) -> Resolution | None:
        normalized_id = _normalize_openalex_id(openalex_id)
        payload = self._safe_get_json(f"https://api.openalex.org/works/{normalized_id}")
@ -227,6 +256,30 @@ class MetadataResolver:
            return []
        return [_openalex_work_to_entry(item) for item in payload.get("results", [])]

+    def search_pubmed(self, title: str, limit: int = 5) -> list[BibEntry]:
+        query_text = " ".join(title.split())
+        if not query_text:
+            return []
+        query = urllib.parse.urlencode(
+            {
+                "db": "pubmed",
+                "retmode": "json",
+                "retmax": max(1, limit),
+                "term": query_text,
+            }
+        )
+        payload = self._safe_get_json(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?{query}")
+        if payload is None:
+            return []
+        ids = [
+            normalized
+            for value in payload.get("esearchresult", {}).get("idlist", [])
+            if (normalized := _normalize_pmid(str(value)))
+        ]
+        if not ids:
+            return []
+        return self._fetch_pubmed_entries(ids[:limit])
+
    def _safe_get_json(self, url: str) -> dict | None:
        try:
            return self.source_client.get_json(url)
@ -265,6 +318,51 @@ class MetadataResolver:
            source_label=f"openalex:search:{title}",
        )

+    def search_pubmed_best_match(
+        self,
+        title: str,
+        author_text: str = "",
+        year: str = "",
+    ) -> Resolution | None:
+        candidate = _select_best_title_match(
+            self.search_pubmed(title, limit=5),
+            title=title,
+            author_text=author_text,
+            year=year,
+        )
+        if candidate is None:
+            return None
+        return Resolution(
+            entry=candidate,
+            source_type="resolver",
+            source_label=f"pubmed:search:{title}",
+        )
+
+    def _fetch_pubmed_entries(self, pmids: list[str]) -> list[BibEntry]:
+        ordered_pmids = [pmid for pmid in dict.fromkeys(pmids) if pmid]
+        if not ordered_pmids:
+            return []
+
+        id_param = ",".join(ordered_pmids)
+        summary_query = urllib.parse.urlencode({"db": "pubmed", "retmode": "json", "id": id_param})
+        summaries_payload = self._safe_get_json(
+            f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?{summary_query}"
+        ) or {}
+        summaries = summaries_payload.get("result", {})
+
+        fetch_query = urllib.parse.urlencode({"db": "pubmed", "id": id_param, "retmode": "xml"})
+        root = self._safe_get_xml(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{fetch_query}")
+        articles = _pubmed_articles_by_pmid(root)
+
+        entries: list[BibEntry] = []
+        for pmid in ordered_pmids:
+            summary = summaries.get(pmid)
+            article = articles.get(pmid)
+            if not summary and article is None:
+                continue
+            entries.append(_pubmed_record_to_entry(summary or {}, article, fallback_pmid=pmid))
+        return entries
+
 def merge_entries(base: BibEntry, resolved: BibEntry) -> BibEntry:
    merged, _ = merge_entries_with_conflicts(base, resolved)
    return merged
@ -651,6 +749,214 @@ def _candidate_matches_author_tokens(candidate: BibEntry, author_tokens: set[str
    return bool(author_tokens & candidate_tokens)


+def _normalize_pmid(value: str) -> str:
+    return "".join(ch for ch in str(value) if ch.isdigit())
+
+
+def _pubmed_articles_by_pmid(root: ET.Element | None) -> dict[str, ET.Element]:
+    if root is None:
+        return {}
+    articles: dict[str, ET.Element] = {}
+    for article in root.findall(".//PubmedArticle"):
+        pmid = _normalize_pmid(_node_text(article.find("./MedlineCitation/PMID")))
+        if pmid:
+            articles[pmid] = article
+    return articles
+
+
+def _find_pubmed_article(root: ET.Element, pmid: str) -> ET.Element | None:
+    return _pubmed_articles_by_pmid(root).get(_normalize_pmid(pmid))
+
+
+def _pubmed_record_to_entry(summary: dict, article: ET.Element | None, fallback_pmid: str) -> BibEntry:
+    if article is not None:
+        entry = _pubmed_article_to_entry(article, fallback_pmid=fallback_pmid)
+        _merge_pubmed_summary_into_fields(entry.fields, summary, fallback_pmid)
+        return entry
+    fields = _pubmed_summary_fields(summary, fallback_pmid)
+    citation_key = _pubmed_citation_key(
+        fields.get("doi", ""),
+        fields.get("pmid", ""),
+        fields.get("author", ""),
+        fields.get("year", ""),
+        fields.get("title", ""),
+    )
+    return BibEntry(entry_type="article", citation_key=citation_key, fields=fields)
+
+
+def _pubmed_article_to_entry(article: ET.Element, fallback_pmid: str = "") -> BibEntry:
+    medline = article.find("./MedlineCitation")
+    article_node = medline.find("./Article") if medline is not None else None
+    pubmed_data = article.find("./PubmedData")
+    pmid = _normalize_pmid(_node_text(medline.find("./PMID")) if medline is not None else fallback_pmid) or _normalize_pmid(
+        fallback_pmid
+    )
+    title = _normalize_text(_element_text(article_node.find("./ArticleTitle")) if article_node is not None else "")
+    authors = " and ".join(
+        name
+        for name in (_pubmed_author_name(author) for author in article.findall(".//AuthorList/Author"))
+        if name
+    )
+    journal = _normalize_text(_node_text(article.find(".//Journal/Title")))
+    year = _pubmed_article_year(article)
+    abstract = _pubmed_abstract_text(article)
+    doi = _pubmed_article_identifier(article, "doi")
+    pmcid = _pubmed_article_identifier(pubmed_data, "pmc")
+
+    fields: dict[str, str] = {}
+    if title:
+        fields["title"] = title
+    if authors:
+        fields["author"] = authors
+    if year:
+        fields["year"] = year
+    if journal:
+        fields["journal"] = journal
+    if abstract:
+        fields["abstract"] = abstract
+    if doi:
+        fields["doi"] = doi
+    if pmid:
+        fields["pmid"] = pmid
+    if pmcid:
+        fields["pmcid"] = pmcid
+        fields["url"] = f"https://pmc.ncbi.nlm.nih.gov/articles/{pmcid}/"
+    elif pmid:
+        fields["url"] = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+
+    citation_key = _pubmed_citation_key(doi, pmid, authors, year, title)
+    return BibEntry(entry_type="article", citation_key=citation_key, fields=fields)
+
+
+def _merge_pubmed_summary_into_fields(fields: dict[str, str], summary: dict, fallback_pmid: str) -> None:
+    for key, value in _pubmed_summary_fields(summary, fallback_pmid).items():
+        if value and not fields.get(key):
+            fields[key] = value
+
+
+def _pubmed_summary_fields(summary: dict, fallback_pmid: str) -> dict[str, str]:
+    pmid = _normalize_pmid(str(summary.get("uid") or fallback_pmid))
+    title = _normalize_text(str(summary.get("title") or ""))
+    year = _pubmed_year_from_text(str(summary.get("pubdate") or ""))
+    journal = _normalize_text(str(summary.get("fulljournalname") or ""))
+    authors = " and ".join(
+        name
+        for name in (
+            _normalize_person_display_name(str(author.get("name") or ""))
+            for author in summary.get("authors", [])
+        )
+        if name
+    )
+    doi = ""
+    pmcid = ""
+    for article_id in summary.get("articleids", []) or []:
+        id_type = str(article_id.get("idtype") or "").lower()
+        value = str(article_id.get("value") or "")
+        if id_type == "doi" and value:
+            doi = value
+        elif id_type in {"pmc", "pmcid"} and value:
+            pmcid = value
+
+    fields: dict[str, str] = {}
+    if title:
+        fields["title"] = title
+    if authors:
+        fields["author"] = authors
+    if year:
+        fields["year"] = year
+    if journal:
+        fields["journal"] = journal
+    if doi:
+        fields["doi"] = doi
+    if pmid:
+        fields["pmid"] = pmid
+    if pmcid:
+        fields["pmcid"] = pmcid
+        fields["url"] = f"https://pmc.ncbi.nlm.nih.gov/articles/{pmcid}/"
+    elif pmid:
+        fields["url"] = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+    return fields
+
+
+def _pubmed_author_name(author: ET.Element) -> str:
+    collective = _normalize_text(_node_text(author.find("./CollectiveName")))
+    if collective:
+        return collective
+    family = _normalize_text(_node_text(author.find("./LastName")))
+    given = _normalize_text(_node_text(author.find("./ForeName")))
+    initials = _normalize_text(_node_text(author.find("./Initials")))
+    if family and given:
+        return f"{family}, {given}"
+    if family and initials:
+        normalized_initials = " ".join(f"{letter}." for letter in re.findall(r"[A-Za-z]", initials))
+        return f"{family}, {normalized_initials}" if normalized_initials else family
+    return family or given
+
+
+def _pubmed_article_year(article: ET.Element) -> str:
+    for path in (
+        ".//JournalIssue/PubDate/Year",
+        ".//ArticleDate/Year",
+        ".//PubDate/Year",
+    ):
+        year = _node_text(article.find(path))
+        if year:
+            return year
+    for path in (
+        ".//JournalIssue/PubDate/MedlineDate",
+        ".//PubDate/MedlineDate",
+    ):
+        year = _pubmed_year_from_text(_node_text(article.find(path)))
+        if year:
+            return year
+    return ""
+
+
+def _pubmed_year_from_text(value: str) -> str:
+    match = re.search(r"\b(1[6-9]\d{2}|20\d{2}|21\d{2})\b", value)
+    return match.group(1) if match else ""
+
+
+def _pubmed_abstract_text(article: ET.Element) -> str:
+    parts: list[str] = []
+    for node in article.findall(".//Abstract/AbstractText"):
+        text = _normalize_text(_element_text(node))
+        if not text:
+            continue
+        label = _normalize_text(node.attrib.get("Label", ""))
+        parts.append(f"{label}: {text}" if label else text)
+    return " ".join(parts)
+
+
+def _pubmed_article_identifier(root: ET.Element | None, identifier_type: str) -> str:
+    if root is None:
+        return ""
+    normalized_type = identifier_type.lower()
+    for node in root.findall(".//ArticleId"):
+        if str(node.attrib.get("IdType") or "").lower() == normalized_type:
+            return _normalize_text(_element_text(node))
+    if normalized_type == "doi":
+        for node in root.findall(".//ELocationID"):
+            if str(node.attrib.get("EIdType") or "").lower() == "doi":
+                return _normalize_text(_element_text(node))
+    return ""
+
+
+def _pubmed_citation_key(doi: str, pmid: str, authors: str, year: str, title: str) -> str:
+    if doi:
+        suffix = re.sub(r"[^A-Za-z0-9]+", "", doi).lower()
+        return f"doi{suffix}"
+    if pmid:
+        return f"pmid{pmid}"
+    return _make_resolution_key(authors or "pubmed", year or "n.d.", title or "untitled")
+
+
+def _element_text(node: ET.Element | None) -> str:
+    if node is None:
+        return ""
+    return " ".join("".join(node.itertext()).split())
+
+
 def _datacite_work_to_entry(data: dict) -> BibEntry:
    attributes = data.get("attributes", {})
    doi = str(attributes.get("doi") or "")
--- a/src/citegeist/verify.py
+++ b/src/citegeist/verify.py
@ -149,6 +149,20 @@ class BibliographyVerifier:
                    input_type=input_type,
                    input_key=input_key,
                )
+        if source_entry is not None and source_entry.fields.get("pmid"):
+            direct = self.resolver.resolve_pmid(source_entry.fields["pmid"])
+            if direct is not None:
+                return VerificationResult(
+                    query=query,
+                    context=context,
+                    status="exact",
+                    confidence=1.0,
+                    entry=direct.entry,
+                    source_label=direct.source_label,
+                    alternates=[],
+                    input_type=input_type,
+                    input_key=input_key,
+                )

        candidate_limit = max(1, limit)
        candidates = self._collect_candidates(
@ -209,6 +223,7 @@ class BibliographyVerifier:
            ("crossref", self.resolver.search_crossref(search_title, limit=limit)),
            ("openalex", self.resolver.search_openalex(search_title, limit=limit)),
            ("datacite", self.resolver.search_datacite(search_title, limit=limit)),
+            ("pubmed", self.resolver.search_pubmed(search_title, limit=limit)),
        ):
            for entry in source_entries:
                signature = _candidate_signature(entry)
--- a/tests/test_bootstrap.py
+++ b/tests/test_bootstrap.py
@ -34,6 +34,7 @@ def test_bootstrap_from_topic_only():
    try:
        bootstrapper = Bootstrapper()
        bootstrapper.resolver.search_openalex = lambda topic, limit=5: []  # type: ignore[method-assign]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: [  # type: ignore[method-assign]
            __import__("citegeist").BibEntry(
@ -139,6 +140,7 @@ def test_bootstrap_ranks_and_deduplicates_topic_candidates():
                fields={"title": "Graph Topic Ranking", "abstract": "graph topic graph"},
            )
        ]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: [  # type: ignore[method-assign]
            BibEntry(
                entry_type="article",
@ -172,6 +174,7 @@ def test_bootstrap_preview_does_not_write_to_database():
        bootstrapper.resolver.search_openalex = lambda topic, limit=5: [  # type: ignore[method-assign]
            BibEntry(entry_type="article", citation_key="preview2024graph", fields={"title": "Preview Graph Topic"})
        ]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]

@ -194,6 +197,7 @@ def test_bootstrap_topic_commit_limit_restricts_persisted_candidates():
            BibEntry(entry_type="article", citation_key="rank1", fields={"title": "Graph Topic One"}),
            BibEntry(entry_type="article", citation_key="rank2", fields={"title": "Graph Topic Two"}),
        ]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -227,6 +231,7 @@ def test_bootstrap_topic_candidates_are_attached_to_topic():
                fields={"title": "Graph Topic Result", "year": "2024"},
            )
        ]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -278,6 +283,7 @@ def test_bootstrap_topic_commit_requires_title_anchor():
                },
            ),
        ]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.crossref_expander.expand_entry_references = lambda _store, _key: []  # type: ignore[method-assign]
@ -482,6 +488,7 @@ def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit(
            )
            for index in range(1, 8)
        ][:limit]
+        bootstrapper.resolver.search_pubmed = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_crossref = lambda topic, limit=5: []  # type: ignore[method-assign]
        bootstrapper.resolver.search_datacite = lambda topic, limit=5: []  # type: ignore[method-assign]

--- a/tests/test_resolve.py
+++ b/tests/test_resolve.py
@ -8,6 +8,7 @@ from citegeist.resolve import (
    _crossref_message_to_entry,
    _datacite_work_to_entry,
    _openalex_work_to_entry,
+    _pubmed_article_to_entry,
    merge_entries_with_conflicts,
    merge_entries,
 )
@ -88,6 +89,52 @@ def test_arxiv_atom_entry_to_bib_maps_basic_fields():
    assert entry.fields["doi"] == "10.1000/arxiv-example"


+def test_pubmed_article_to_entry_maps_basic_fields():
+    xml = ET.fromstring(
+        """
+<PubmedArticle>
+  <MedlineCitation>
+    <PMID>12345678</PMID>
+    <Article>
+      <ArticleTitle>PubMed Resolved Work</ArticleTitle>
+      <Abstract>
+        <AbstractText Label="Background">Evidence summary.</AbstractText>
+        <AbstractText>Second paragraph.</AbstractText>
+      </Abstract>
+      <Journal>
+        <JournalIssue>
+          <PubDate><Year>2021</Year></PubDate>
+        </JournalIssue>
+        <Title>Journal of Evidence</Title>
+      </Journal>
+      <AuthorList>
+        <Author><LastName>Smith</LastName><ForeName>Jane</ForeName></Author>
+      </AuthorList>
+      <ELocationID EIdType="doi">10.1000/pubmed-example</ELocationID>
+    </Article>
+  </MedlineCitation>
+  <PubmedData>
+    <ArticleIdList>
+      <ArticleId IdType="pubmed">12345678</ArticleId>
+      <ArticleId IdType="pmc">PMC123456</ArticleId>
+    </ArticleIdList>
+  </PubmedData>
+</PubmedArticle>
+"""
+    )
+
+    entry = _pubmed_article_to_entry(xml)
+
+    assert entry.citation_key == "doi101000pubmedexample"
+    assert entry.fields["title"] == "PubMed Resolved Work"
+    assert entry.fields["author"] == "Smith, Jane"
+    assert entry.fields["journal"] == "Journal of Evidence"
+    assert entry.fields["year"] == "2021"
+    assert entry.fields["pmid"] == "12345678"
+    assert entry.fields["pmcid"] == "PMC123456"
+    assert entry.fields["abstract"] == "Background: Evidence summary. Second paragraph."
+
+
 def test_merge_entries_prefers_existing_values_and_adds_missing_fields():
    base = BibEntry(
        entry_type="article",
@ -209,6 +256,35 @@ def test_resolver_tries_doi_before_dblp():
    ]


+def test_resolver_tries_pmid_before_dblp():
+    resolver = MetadataResolver()
+    calls: list[tuple[str, str]] = []
+
+    def fake_pmid(value: str):
+        calls.append(("pmid", value))
+        return None
+
+    def fake_dblp(value: str):
+        calls.append(("dblp", value))
+        return None
+
+    resolver.resolve_pmid = fake_pmid  # type: ignore[method-assign]
+    resolver.resolve_dblp = fake_dblp  # type: ignore[method-assign]
+
+    resolver.resolve_entry(
+        BibEntry(
+            entry_type="article",
+            citation_key="smith2024graphs",
+            fields={"pmid": "12345678", "dblp": "conf/test/Smith24"},
+        )
+    )
+
+    assert calls == [
+        ("pmid", "12345678"),
+        ("dblp", "conf/test/Smith24"),
+    ]
+
+
 def test_openalex_work_to_entry_maps_basic_fields():
    entry = _openalex_work_to_entry(
        {
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@ -36,6 +36,37 @@ def test_verifier_uses_direct_doi_resolution_for_bib_entries():
    assert result.source_label == "crossref:doi:10.1000/example"


+def test_verifier_uses_direct_pmid_resolution_for_bib_entries():
+    verifier = BibliographyVerifier()
+    verifier.resolver.resolve_pmid = lambda value: Resolution(  # type: ignore[method-assign]
+        entry=BibEntry(
+            entry_type="article",
+            citation_key="pmid12345678",
+            fields={
+                "author": "Smith, Jane",
+                "title": "Resolved PubMed Work",
+                "year": "2024",
+                "pmid": value,
+            },
+        ),
+        source_type="resolver",
+        source_label=f"pubmed:pmid:{value}",
+    )
+
+    result = verifier.verify_bib_entry(
+        BibEntry(
+            entry_type="misc",
+            citation_key="seed2024",
+            fields={"title": "Rough Work", "pmid": "12345678"},
+        )
+    )
+
+    assert result.status == "exact"
+    assert result.confidence == 1.0
+    assert result.entry.fields["title"] == "Resolved PubMed Work"
+    assert result.source_label == "pubmed:pmid:12345678"
+
+
 def test_verifier_scores_and_sorts_search_candidates():
    verifier = BibliographyVerifier()
    verifier.resolver.search_crossref = lambda title, limit=5: [  # type: ignore[method-assign]
@ -61,6 +92,7 @@ def test_verifier_scores_and_sorts_search_candidates():
    ]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
+    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]

    result = verifier.verify_string('"Graph-first bibliography augmentation" Smith 2024')

@ -74,6 +106,7 @@ def test_verification_result_to_bib_entry_contains_audit_fields():
    verifier.resolver.search_crossref = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_openalex = lambda title, limit=5: []  # type: ignore[method-assign]
    verifier.resolver.search_datacite = lambda title, limit=5: []  # type: ignore[method-assign]
+    verifier.resolver.search_pubmed = lambda title, limit=5: []  # type: ignore[method-assign]

    result = verifier._verify_query(  # type: ignore[attr-defined]
        {"title": "Missing Work", "authors": [], "year": "", "venue": ""},