Add ranked claim support analysis and demo UI

2026-05-06 17:57:56 -04:00 · 2026-05-06 17:57:56 -04:00 · 5cde9e54a6
parent 89bc56a7aa
commit 5cde9e54a6
12 changed files with 795 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -51,6 +51,7 @@ The initial repo includes:
 - staged plaintext reference extraction that now preserves more structured metadata from legacy references, including year suffixes, identifiers, volume/issue/pages, and thesis/report/web-style venue hints;
 - a reference-extraction backend seam with the local `heuristic` parser as the default implementation, so optional external backends can be added later without changing the core extract workflow;
 - standalone verification and disambiguation of free-text references or partial BibTeX into auditable BibTeX/JSON results with `x_status`, `x_confidence`, `x_source`, `x_query`, and alternate-candidate traces;
 - a first-pass claim-support workflow that can scan citation-bearing claim sentences in a text excerpt and suggest additional supporting references not already parsed from the excerpt's reference list;
 - identifier-first metadata resolution for DOI, PMID/PubMed, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite/PubMed title-search fallback;
 - local citation-graph traversal over stored `cites`, `cited_by`, and `crossref` edges;
 - Crossref- and OpenAlex-backed graph expansion that materializes draft related works and edge provenance;
@ -174,6 +175,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --ba
 PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
 PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
 PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
 PYTHONPATH=src .venv/bin/python -m citegeist support-claims paper_excerpt.txt --context "artificial life"
 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -208,6 +208,15 @@ Exit criteria:
 Status:
 Early but serviceable. SQLite FTS covers the basic local-search path, but retrieval benchmarking, saved search workflows, and optional semantic ranking remain future work.
 Note:
 The new `support-claims` feature is an early bridge from bibliography work into
 claim-oriented literature assistance. Its current scope is intentionally narrow:
 segment citation-bearing claim sentences from a text excerpt, parse already
 listed references when possible, and suggest additional candidate support using
 the existing verifier/resolver stack. The next quality steps are better claim
 segmentation, stronger deduping against already-used sources, and UI review
 surfaces for per-claim suggestions.
 Note:
 The repository now has a small app-facing JSON adapter surface, a lightweight local HTTP bridge, and a static literature-explorer demo shell. That is enough for a browser or desktop-web shell to drive topic discovery, topic expansion, extraction, verification, entry inspection, and lightweight graph exploration against one local database. It is still a demo boundary rather than a full multi-user application or long-running service architecture.
--- a/examples/literature-explorer/index.html
+++ b/examples/literature-explorer/index.html
@ -405,6 +405,62 @@
      color: var(--ink);
    }
    .claim-stack {
      display: grid;
      gap: 0.85rem;
    }
    .claim-card {
      padding: 0.95rem 1rem;
      border-radius: 18px;
      background: rgba(255, 255, 255, 0.78);
      border: 1px solid rgba(73, 57, 35, 0.11);
      display: grid;
      gap: 0.6rem;
    }
    .claim-score {
      display: inline-flex;
      align-items: center;
      gap: 0.45rem;
      font-size: 0.78rem;
      font-weight: 700;
      letter-spacing: 0.03em;
      color: #6b230f;
      background: #f4dfd3;
      border: 1px solid rgba(141, 63, 45, 0.16);
      border-radius: 999px;
      padding: 0.3rem 0.62rem;
      width: fit-content;
    }
    .claim-text {
      color: var(--ink);
      line-height: 1.5;
    }
    .claim-note {
      font-size: 0.88rem;
      color: var(--muted);
    }
    .claim-ref-list {
      display: grid;
      gap: 0.55rem;
    }
    .claim-ref {
      padding: 0.75rem 0.85rem;
      border-radius: 14px;
      background: rgba(245, 239, 229, 0.68);
      border: 1px solid rgba(73, 57, 35, 0.09);
    }
    .claim-ref strong {
      display: block;
      margin-bottom: 0.15rem;
    }
    .endpoint-card {
      border-radius: 18px;
      border: 1px solid rgba(73, 57, 35, 0.11);
@ -641,6 +697,31 @@
            <button id="verify-button" class="secondary">Verify String</button>
          </div>
        </section>
        <section class="panel card">
          <h2>Claim Support</h2>
          <label>
            Claim-Like Excerpt
            <textarea id="claim-support-text">Computational research touching on movement of agents spans many different fields. Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1]. Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources.</textarea>
          </label>
          <div class="row-3">
            <label>
              Context
              <input id="claim-support-context" value="artificial life" />
            </label>
            <label>
              Max Claims
              <input id="claim-support-max-claims" type="number" min="1" value="5" />
            </label>
            <label>
              Min Claim Chars
              <input id="claim-support-min-chars" type="number" min="20" value="80" />
            </label>
          </div>
          <div class="toolbar">
            <button id="claim-support-button" class="primary full">Suggest Support</button>
          </div>
        </section>
      </div>
    </aside>
@ -696,6 +777,11 @@
        </section>
      </section>
      <section class="panel card">
        <h2>Claim Support Review</h2>
        <div id="claim-support-output" class="empty">Run claim support to rank support-worthy assertions and inspect suggested references.</div>
      </section>
      <section class="panel card">
        <h2>Graph View</h2>
        <div id="graph-output" class="empty">Load a topic to view a small local network around its first few entries.</div>
@ -796,6 +882,7 @@
      searchResults: document.getElementById("search-results"),
      graphOutput: document.getElementById("graph-output"),
      extractVerifyOutput: document.getElementById("extract-verify-output"),
      claimSupportOutput: document.getElementById("claim-support-output"),
      activityLog: document.getElementById("activity-log"),
      metricTopicCount: document.getElementById("metric-topic-count"),
      metricEntryCount: document.getElementById("metric-entry-count"),
@ -835,6 +922,11 @@
      extractText: document.getElementById("extract-text"),
      extractButton: document.getElementById("extract-button"),
      verifyButton: document.getElementById("verify-button"),
      claimSupportText: document.getElementById("claim-support-text"),
      claimSupportContext: document.getElementById("claim-support-context"),
      claimSupportMaxClaims: document.getElementById("claim-support-max-claims"),
      claimSupportMinChars: document.getElementById("claim-support-min-chars"),
      claimSupportButton: document.getElementById("claim-support-button"),
    };
    els.serverUrl.value = state.bridgeUrl;
@ -1057,6 +1149,45 @@
      els.extractVerifyOutput.textContent = JSON.stringify(payload, null, 2);
    }
    function renderClaimSupport(payload) {
      const suggestions = payload?.suggestions || [];
      if (!suggestions.length) {
        renderEmpty(els.claimSupportOutput, "No ranked support suggestions yet. Try a longer excerpt or a different context phrase.");
        return;
      }
      els.claimSupportOutput.className = "claim-stack";
      els.claimSupportOutput.innerHTML = `
        <div class="summary-box">
          <strong>Claim Support Summary</strong>
          <p>${suggestions.length} ranked claims from ${payload.claim_count || 0} extracted candidates · ${payload.existing_reference_count || 0} parsed existing references.</p>
          <p>Claims are ordered by <code>needs_support_score</code>, so uncited or under-supported assertions appear first.</p>
        </div>
        ${suggestions.map((suggestion) => `
          <div class="claim-card">
            <span class="claim-score">Needs Support ${Number(suggestion.needs_support_score ?? 0).toFixed(3)}</span>
            <div class="claim-text">${escapeHtml(suggestion.claim_text || "")}</div>
            <div class="pill-row">
              ${(suggestion.existing_citation_markers || []).map((marker) => `<span class="pill">${escapeHtml(marker)}</span>`).join("") || '<span class="pill">no inline citations detected</span>'}
            </div>
            ${suggestion.note ? `<div class="claim-note">${escapeHtml(suggestion.note)}</div>` : ""}
            <div class="claim-ref-list">
              ${(suggestion.suggested_references || []).map((reference) => `
                <div class="claim-ref">
                  <strong>${escapeHtml(reference.title || reference.citation_key || "candidate")}</strong>
                  <p>${escapeHtml(reference.authors || "Unknown authors")} · ${escapeHtml(reference.year || "n.d.")} · score ${Number(reference.score ?? 0).toFixed(3)}</p>
                  <div class="pill-row">
                    ${reference.journal ? `<span class="pill">${escapeHtml(reference.journal)}</span>` : ""}
                    ${reference.doi ? `<span class="pill">${escapeHtml(reference.doi)}</span>` : ""}
                    ${reference.source_label ? `<span class="pill">${escapeHtml(reference.source_label)}</span>` : ""}
                  </div>
                </div>
              `).join("")}
            </div>
          </div>
        `).join("")}
      `;
    }
    function renderExpandSummary(payload) {
      if (!els.expandSummary) return;
      const results = payload?.results || [];
@ -1320,6 +1451,29 @@
      }
    }
    async function runClaimSupport() {
      if (!state.client) {
        setStatus("Connect to the server first.", "error");
        return;
      }
      setBusy(els.claimSupportButton, true);
      try {
        const payload = await state.client.supportClaims(els.claimSupportText.value, {
          context: els.claimSupportContext.value.trim(),
          limit: 5,
          max_claims: Number(els.claimSupportMaxClaims.value || 5),
          min_claim_chars: Number(els.claimSupportMinChars.value || 80),
        });
        renderClaimSupport(payload);
        setLastOp("support_claims");
        logActivity("support_claims", payload);
      } catch (error) {
        setStatus(String(error.message || error), "error");
      } finally {
        setBusy(els.claimSupportButton, false);
      }
    }
    async function exportTopicBibtex(topicSlug) {
      if (!state.client || !topicSlug) {
        setStatus("Connect to the server first.", "error");
@ -1408,6 +1562,7 @@
    els.searchButton.addEventListener("click", runSearch);
    els.extractButton.addEventListener("click", runExtract);
    els.verifyButton.addEventListener("click", runVerify);
    els.claimSupportButton.addEventListener("click", runClaimSupport);
  </script>
 </body>
 </html>
--- a/examples/literature-explorer/literature-explorer.js
+++ b/examples/literature-explorer/literature-explorer.js
@ -30,6 +30,9 @@ export function createLiteratureExplorerClient(bridge) {
    verifyStrings(values, options = {}) {
      return bridge.call("verify_strings", { values, ...options });
    },
    supportClaims(text, options = {}) {
      return bridge.call("support_claims", { text, ...options });
    },
    verifyBibtex(bibtexText, options = {}) {
      return bridge.call("verify_bibtex", { bibtex_text: bibtexText, ...options });
    },
--- a/src/citegeist/app_api.py
+++ b/src/citegeist/app_api.py
@ -4,6 +4,7 @@ from dataclasses import asdict
 from .bibtex import BibEntry, parse_bibtex, render_bibtex
 from .bootstrap import Bootstrapper
 from .claim_support import analyze_support_gaps
 from .expand import TopicExpander
 from .extract import extract_references
 from .storage import BibliographyStore
@ -42,6 +43,7 @@ class LiteratureExplorerApi:
                "expand_topic",
                "extract_text",
                "verify_strings",
                "support_claims",
                "graph",
            ],
            "preview_operations": ["bootstrap", "expand_topic"],
@ -216,6 +218,26 @@ class LiteratureExplorerApi:
            "results": [_verification_payload(result) for result in results],
        }
    def support_claims(
        self,
        text: str,
        *,
        context: str = "",
        limit: int = 5,
        max_claims: int = 8,
        min_claim_chars: int = 90,
    ) -> dict[str, object]:
        payload = analyze_support_gaps(
            text,
            verifier=self.verifier,
            context=context,
            limit=limit,
            max_claims=max_claims,
            min_claim_chars=min_claim_chars,
        )
        payload["context"] = context
        return payload
    def verify_bibtex(self, bibtex_text: str, *, context: str = "", limit: int = 5) -> dict[str, object]:
        entries = parse_bibtex(bibtex_text)
        results = [self.verifier.verify_bib_entry(entry, context=context, limit=limit) for entry in entries]
--- a/src/citegeist/app_server.py
+++ b/src/citegeist/app_server.py
@ -94,6 +94,14 @@ class LiteratureExplorerAppServer:
                context=str(params.get("context") or ""),
                limit=int(params.get("limit", 5)),
            )
        if method == "support_claims":
            return self.api.support_claims(
                str(params.get("text") or ""),
                context=str(params.get("context") or ""),
                limit=int(params.get("limit", 5)),
                max_claims=int(params.get("max_claims", 8)),
                min_claim_chars=int(params.get("min_claim_chars", 90)),
            )
        if method == "verify_bibtex":
            return self.api.verify_bibtex(
                str(params.get("bibtex_text") or ""),
--- a/src/citegeist/claim_support.py
+++ b/src/citegeist/claim_support.py
@ -0,0 +1,307 @@
 from __future__ import annotations
 from dataclasses import dataclass
 import re
 from .verify import BibliographyVerifier
 CLAIM_MARKER = "✅"
 NUMERIC_CITATION_PATTERN = re.compile(r"\[(\d+)\]")
 AUTHOR_YEAR_PAREN_PATTERN = re.compile(
    r"\(([A-Z][A-Za-z'’.-]+(?:\s+(?:and|&|et al\.?))?(?:\s+[A-Z][A-Za-z'’.-]+)*,?\s+\d{4}[a-z]?)\)"
 )
 AUTHOR_YEAR_INLINE_PATTERN = re.compile(
    r"\b([A-Z][A-Za-z'’.-]+(?:\s+(?:and|&|et al\.?))?(?:\s+[A-Z][A-Za-z'’.-]+)*)\s*\((\d{4}[a-z]?)\)"
 )
 REFERENCE_ENTRY_PATTERN = re.compile(r"^\s*\[\[(\d+)\]\]\s*(.+)$", re.MULTILINE)
 SENTENCE_SPLIT_PATTERN = re.compile(r'(?<=[.!?])\s+(?=[A-Z0-9"\[])')
 SECTION_HEADER_PATTERN = re.compile(r"^(?:[IVX]+\.|[A-Z]\.)\s+[A-Z]")
 CONTINUATION_START_PATTERN = re.compile(
    r"^(?:instead|rather|thus|therefore|however|moreover|further|furthermore|"
    r"because|given that|in most cases|for many purposes|these|this|such|it|they|"
    r"another|the same|that |those )",
    re.IGNORECASE,
 )
 CLAIM_SIGNAL_PATTERN = re.compile(
    r"\b(?:we|our|this|these|those|research|results?|findings?|analysis|approach|model(?:ing)?|"
    r"study|studies|work|movement|evolution(?:ary)?|agents?|organisms?|intelligence|behavior|"
    r"behaviour|environment(?:al)?|resource(?:s)?|strategy|strategies|generaliz(?:e|ation)|"
    r"suggest(?:s|ed)?|indicat(?:es|ed)|show(?:s|ed)?|demonstrat(?:e|es|ed)|permit(?:s|ted)?|"
    r"require(?:s|d)?|provide(?:s|d)?|span(?:s|ned)?|range(?:s|d)?|covers?|across|exploit(?:s|ed)?|"
    r"emerge(?:s|d)|evolved?|hypothesis|goal|question|capabilit(?:y|ies)|complex(?:ity)?|"
    r"resource peak|gradient ascent|optimal|random walk|turing-complete)\b",
    re.IGNORECASE,
 )
 NON_CLAIM_START_PATTERN = re.compile(
    r"^(?:abstract|introduction|methods|results|discussion|future work|conclusions?|references|"
    r"keywords?|fig\.|table\s|view\s+\d+|show\s+abstract|relevance:|optional|already cited|"
    r"new references found)",
    re.IGNORECASE,
 )
@dataclass(slots=True)
 class ClaimSupportSuggestion:
    claim_text: str
    existing_citation_markers: list[str]
    existing_reference_titles: list[str]
    suggested_references: list[dict[str, object]]
    needs_support_score: float
    note: str | None = None
    def to_dict(self) -> dict[str, object]:
        return {
            "claim_text": self.claim_text,
            "existing_citation_markers": list(self.existing_citation_markers),
            "existing_reference_titles": list(self.existing_reference_titles),
            "suggested_references": list(self.suggested_references),
            "needs_support_score": round(float(self.needs_support_score), 3),
            "note": self.note,
        }
@dataclass(slots=True)
 class ClaimCandidate:
    text: str
    citation_markers: list[str]
    needs_support_score: float
 def analyze_support_gaps(
    text: str,
    *,
    verifier: BibliographyVerifier | None = None,
    context: str = "",
    limit: int = 5,
    max_claims: int = 8,
    min_claim_chars: int = 90,
 ) -> dict[str, object]:
    verifier = verifier or BibliographyVerifier()
    existing_references = _extract_existing_references(text)
    existing_titles_normalized = {_normalize_title(title) for title in existing_references.values() if title}
    claims = _extract_claim_candidates(text, max_claims=max_claims, min_claim_chars=min_claim_chars)
    suggestions: list[ClaimSupportSuggestion] = []
    for claim in claims:
        referenced_titles = [
            existing_references[marker]
            for marker in claim.citation_markers
            if marker in existing_references and existing_references[marker]
        ]
        verification = verifier.verify_string(claim.text, context=context, limit=limit)
        candidates = [verification.entry, *[alt.entry for alt in verification.alternates]]
        sources = [verification.source_label, *[alt.source_label for alt in verification.alternates]]
        scores = [verification.confidence, *[alt.score for alt in verification.alternates]]
        rendered: list[dict[str, object]] = []
        seen_titles: set[str] = set()
        for entry, source_label, score in zip(candidates, sources, scores):
            title = str(entry.fields.get("title") or "").strip()
            normalized_title = _normalize_title(title)
            if not title or normalized_title in existing_titles_normalized or normalized_title in seen_titles:
                continue
            seen_titles.add(normalized_title)
            rendered.append(
                {
                    "citation_key": entry.citation_key,
                    "entry_type": entry.entry_type,
                    "title": title,
                    "authors": str(entry.fields.get("author") or ""),
                    "year": str(entry.fields.get("year") or ""),
                    "doi": str(entry.fields.get("doi") or ""),
                    "journal": str(entry.fields.get("journal") or entry.fields.get("booktitle") or ""),
                    "source_label": source_label,
                    "score": round(float(score), 4),
                }
            )
        if rendered:
            suggestions.append(
                ClaimSupportSuggestion(
                    claim_text=claim.text,
                    existing_citation_markers=claim.citation_markers,
                    existing_reference_titles=referenced_titles,
                    suggested_references=rendered,
                    needs_support_score=claim.needs_support_score,
                    note=_build_note(claim.citation_markers, referenced_titles),
                )
            )
    suggestions.sort(
        key=lambda item: (
            item.needs_support_score,
            len(item.suggested_references),
            len(item.claim_text),
        ),
        reverse=True,
    )
    return {
        "claim_count": len(claims),
        "existing_reference_count": len(existing_references),
        "suggestion_count": len(suggestions),
        "suggestions": [item.to_dict() for item in suggestions],
    }
 def _extract_claim_candidates(text: str, *, max_claims: int, min_claim_chars: int) -> list[ClaimCandidate]:
    body = text.partition("References")[0] if "References" in text else text
    sentences = _prepare_sentences(body)
    claims: list[ClaimCandidate] = []
    index = 0
    while index < len(sentences):
        current = sentences[index]
        if not _is_claim_like(current, min_claim_chars=min_claim_chars):
            index += 1
            continue
        parts = [current]
        index += 1
        while index < len(sentences) and _should_merge_continuation(parts[-1], sentences[index], min_claim_chars=min_claim_chars):
            parts.append(sentences[index])
            index += 1
        claim_text = " ".join(parts).strip()
        if len(claim_text) < min_claim_chars:
            continue
        claims.append(
            ClaimCandidate(
                text=claim_text,
                citation_markers=_extract_citation_markers(claim_text),
                needs_support_score=_score_claim_need(claim_text),
            )
        )
        if len(claims) >= max_claims:
            break
    return claims
 def _prepare_sentences(body: str) -> list[str]:
    cleaned_body = body.replace(CLAIM_MARKER, " ").replace("✅", " ")
    cleaned_body = re.sub(r"\s+", " ", cleaned_body)
    sentences: list[str] = []
    for sentence in SENTENCE_SPLIT_PATTERN.split(cleaned_body):
        cleaned = sentence.strip()
        if not cleaned:
            continue
        if cleaned.upper() == cleaned and len(cleaned) > 24:
            continue
        if NON_CLAIM_START_PATTERN.match(cleaned):
            continue
        if SECTION_HEADER_PATTERN.match(cleaned):
            continue
        sentences.append(cleaned)
    return sentences
 def _is_claim_like(sentence: str, *, min_claim_chars: int) -> bool:
    if len(sentence) < max(45, min_claim_chars // 2):
        return False
    if sentence.startswith("[["):
        return False
    if NUMERIC_CITATION_PATTERN.search(sentence):
        return True
    if AUTHOR_YEAR_PAREN_PATTERN.search(sentence) or AUTHOR_YEAR_INLINE_PATTERN.search(sentence):
        return True
    if CLAIM_SIGNAL_PATTERN.search(sentence) and (len(sentence) >= min_claim_chars or sentence.count(",") >= 1):
        return True
    return False
 def _should_merge_continuation(current: str, next_sentence: str, *, min_claim_chars: int) -> bool:
    if len(current) >= max(min_claim_chars * 3, 320):
        return False
    if not _is_claim_like(next_sentence, min_claim_chars=max(45, min_claim_chars // 2)):
        return False
    if CONTINUATION_START_PATTERN.match(next_sentence):
        return True
    current_markers = _extract_citation_markers(current)
    next_markers = _extract_citation_markers(next_sentence)
    if next_markers and not current_markers:
        return True
    if current_markers and len(next_sentence) < max(min_claim_chars, 180):
        return True
    return False
 def _extract_existing_references(text: str) -> dict[str, str]:
    if "References" not in text:
        return {}
    _, _, tail = text.partition("References")
    references: dict[str, str] = {}
    for match in REFERENCE_ENTRY_PATTERN.finditer(tail):
        marker = match.group(1)
        title = match.group(2).strip()
        references[marker] = title
    return references
 def _extract_citation_markers(text: str) -> list[str]:
    markers: list[str] = []
    seen: set[str] = set()
    for match in NUMERIC_CITATION_PATTERN.finditer(text):
        marker = match.group(1)
        if marker not in seen:
            seen.add(marker)
            markers.append(marker)
    for match in AUTHOR_YEAR_PAREN_PATTERN.finditer(text):
        marker = f"({match.group(1)})"
        if marker not in seen:
            seen.add(marker)
            markers.append(marker)
    for match in AUTHOR_YEAR_INLINE_PATTERN.finditer(text):
        marker = f"{match.group(1)} ({match.group(2)})"
        if marker not in seen:
            seen.add(marker)
            markers.append(marker)
    return markers
 def _score_claim_need(text: str) -> float:
    score = 0.0
    markers = _extract_citation_markers(text)
    length = len(text)
    signal_count = len(CLAIM_SIGNAL_PATTERN.findall(text))
    if not markers:
        score += 3.0
    else:
        score += max(0.25, 1.5 - min(len(markers), 3) * 0.35)
        if any(marker.isdigit() for marker in markers):
            score += 0.35
    if length >= 220:
        score += 1.25
    elif length >= 140:
        score += 0.85
    elif length >= 90:
        score += 0.45
    score += min(signal_count, 6) * 0.25
    if "," in text:
        score += 0.2
    if any(token in text.lower() for token in ("suggest", "indicate", "show", "demonstrate", "require", "because")):
        score += 0.3
    return score
 def _normalize_title(value: str) -> str:
    return re.sub(r"[^a-z0-9]+", " ", value.lower()).strip()
 def _build_note(markers: list[str], titles: list[str]) -> str | None:
    if not markers:
        return "No existing inline citation markers detected for this claim."
    if titles:
        return f"Existing citations detected: {', '.join(_render_marker(marker) for marker in markers)}."
    return (
        "Inline citation markers detected "
        f"({', '.join(_render_marker(marker) for marker in markers)}), but no matching reference titles were parsed."
    )
 def _render_marker(marker: str) -> str:
    if marker.isdigit():
        return f"[{marker}]"
    return marker
--- a/src/citegeist/cli.py
+++ b/src/citegeist/cli.py
@ -10,6 +10,7 @@ from pathlib import Path
 from .batch import BatchBootstrapRunner, load_batch_jobs
 from .bibtex import BibEntry, parse_bibtex, render_bibtex
 from .bootstrap import Bootstrapper
 from .claim_support import analyze_support_gaps
 from .examples.talkorigins import TalkOriginsScraper
 from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
 from .notebook_export import export_notebook_topic_bundle
@ -171,6 +172,22 @@ def build_parser() -> argparse.ArgumentParser:
    )
    verify_parser.add_argument("--output", help="Write verification results to a file instead of stdout")
    support_claims_parser = subparsers.add_parser(
        "support-claims",
        help="Suggest additional supporting references for claim-like sentences in a text",
    )
    support_claims_parser.add_argument("input", help="Text file to analyze")
    support_claims_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
    support_claims_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per claim")
    support_claims_parser.add_argument("--max-claims", type=int, default=8, help="Maximum claim-like sentences to inspect")
    support_claims_parser.add_argument(
        "--min-claim-chars",
        type=int,
        default=90,
        help="Minimum sentence length to consider as a claim candidate",
    )
    support_claims_parser.add_argument("--output", help="Write JSON results to a file instead of stdout")
    resolve_parser = subparsers.add_parser("resolve", help="Enrich stored entries from external metadata sources")
    resolve_parser.add_argument("citation_keys", nargs="+", help="Citation keys to enrich")
@ -767,6 +784,15 @@ def main(argv: list[str] | None = None) -> int:
                llm_provider=args.llm_provider,
                llm_role=args.llm_role,
            )
        if args.command == "support-claims":
            return _run_support_claims(
                Path(args.input),
                args.context,
                args.limit,
                args.max_claims,
                args.min_claim_chars,
                args.output,
            )
        if args.command == "resolve":
            return _run_resolve(store, args.citation_keys)
        if args.command == "enrich-oa":
@ -1217,6 +1243,32 @@ def _run_verify(
    return 0
 def _run_support_claims(
    input_path: Path,
    context: str,
    limit: int,
    max_claims: int,
    min_claim_chars: int,
    output: str | None,
 ) -> int:
    text = input_path.read_text(encoding="utf-8")
    verifier = BibliographyVerifier()
    payload = analyze_support_gaps(
        text,
        verifier=verifier,
        context=context,
        limit=limit,
        max_claims=max_claims,
        min_claim_chars=min_claim_chars,
    )
    rendered = json.dumps(payload, indent=2)
    if output:
        Path(output).write_text(rendered + "\n", encoding="utf-8")
    else:
        print(rendered)
    return 0
 def _print_progress(label: str, index: int, total: int, detail: str | None = None) -> None:
    message = f"[{index}/{total}] {label}"
    if detail:
--- a/tests/test_app_api.py
+++ b/tests/test_app_api.py
@ -3,6 +3,7 @@ from citegeist.app_api import LiteratureExplorerApi
 from citegeist.bibtex import BibEntry
 from citegeist.bootstrap import BootstrapResult
 from citegeist.expand import ExpansionResult
 from citegeist.verify import VerificationMatch, VerificationResult
 class FakeBootstrapper:
@ -80,6 +81,38 @@ class FakeTopicExpander:
        ]
 class FakeVerifier:
    def verify_strings(self, values, context="", limit=5):
        return []
    def verify_string(self, value: str, context: str = "", limit: int = 5):
        return VerificationResult(
            query=value,
            context=context,
            status="high_confidence",
            confidence=0.88,
            entry=BibEntry(
                entry_type="article",
                citation_key="support2024",
                fields={"title": "Support Paper", "year": "2024"},
            ),
            source_label="openalex:search:Support Paper",
            alternates=[
                VerificationMatch(
                    entry=BibEntry(
                        entry_type="article",
                        citation_key="alt2023",
                        fields={"title": "Alternate Support", "year": "2023"},
                    ),
                    score=0.66,
                    source_label="crossref:search:Alternate Support",
                )
            ],
            input_type="string",
            input_key=None,
        )
 def test_literature_explorer_api_search_and_show_entry():
    store = BibliographyStore()
    try:
@ -119,6 +152,33 @@ def test_literature_explorer_api_capabilities_distinguish_metadata_and_expansion
        assert payload["graph_expansion_sources"] == ["crossref", "openalex"]
        assert payload["topic_expansion_sources"] == ["crossref", "openalex"]
        assert payload["graph_relation_types"] == ["cites", "cited_by", "both"]
        assert "support_claims" in payload["operations"]
    finally:
        store.close()
 def test_literature_explorer_api_support_claims_returns_suggestions():
    store = BibliographyStore()
    try:
        api = LiteratureExplorerApi(store, verifier=FakeVerifier())
        payload = api.support_claims(
            """
 Long claim text about agents evolving intelligent movement strategies in multiple computational settings without enough direct support [1].
 References
 [[1]]Earlier Cited Paper
 """
        ,
            context="artificial life",
            limit=3,
            max_claims=2,
            min_claim_chars=40,
        )
        assert payload["context"] == "artificial life"
        assert payload["suggestion_count"] == 1
        assert payload["suggestions"][0]["suggested_references"][0]["citation_key"] == "support2024"
    finally:
        store.close()
--- a/tests/test_claim_support.py
+++ b/tests/test_claim_support.py
@ -0,0 +1,116 @@
 from citegeist.bibtex import BibEntry
 from citegeist.claim_support import analyze_support_gaps
 from citegeist.verify import VerificationMatch, VerificationResult
 class FakeVerifier:
    def __init__(self) -> None:
        self.queries: list[str] = []
    def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
        self.queries.append(value)
        return VerificationResult(
            query=value,
            context=context,
            status="high_confidence",
            confidence=0.91,
            entry=BibEntry(
                entry_type="article",
                citation_key="new2020support",
                fields={
                    "title": "A Better Support Paper",
                    "author": "Smith, Jane",
                    "year": "2020",
                    "doi": "10.1000/new",
                    "journal": "Journal of Better Support",
                },
            ),
            source_label="openalex:search:A Better Support Paper",
            alternates=[
                VerificationMatch(
                    entry=BibEntry(
                        entry_type="article",
                        citation_key="cited1985",
                        fields={
                            "title": "Neural computation of decisions in optimization problems",
                            "author": "Hopfield, J. J. and Tank, D. W.",
                            "year": "1985",
                        },
                    ),
                    score=0.7,
                    source_label="crossref:search:Neural computation of decisions in optimization problems",
                )
            ],
            input_type="string",
            input_key=None,
        )
 def test_analyze_support_gaps_filters_existing_reference_titles():
    verifier = FakeVerifier()
    text = """
 Computational research touching on movement of agents spans many different fields. Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1].
 References
 [[1]]Neural computation of decisions in optimization problems
 J. J. Hopfield, David W. Tank
 """
    payload = analyze_support_gaps(text, verifier=verifier, max_claims=3, min_claim_chars=40)
    assert payload["claim_count"] == 1
    assert payload["suggestion_count"] == 1
    suggestion = payload["suggestions"][0]
    assert suggestion["existing_citation_markers"] == ["1"]
    assert suggestion["existing_reference_titles"] == ["Neural computation of decisions in optimization problems"]
    assert suggestion["suggested_references"][0]["title"] == "A Better Support Paper"
    assert suggestion["needs_support_score"] > 0
    titles = [item["title"] for item in suggestion["suggested_references"]]
    assert "Neural computation of decisions in optimization problems" not in titles
 def test_analyze_support_gaps_groups_adjacent_uncited_claim_sentences():
    verifier = FakeVerifier()
    text = """
 Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources. Instead, in looking at the evolution of intelligent behavior, our primary interest is in finding out by what means less capable agents give rise to those able to appropriately exploit prevailing conditions.
 """
    payload = analyze_support_gaps(text, verifier=verifier, max_claims=2, min_claim_chars=80)
    assert payload["claim_count"] == 1
    assert payload["suggestion_count"] == 1
    suggestion = payload["suggestions"][0]
    assert suggestion["existing_citation_markers"] == []
    assert "No existing inline citation markers detected" in suggestion["note"]
    assert "Instead, in looking at the evolution of intelligent behavior" in suggestion["claim_text"]
    assert suggestion["needs_support_score"] > 3.0
    assert len(verifier.queries) == 1
    assert verifier.queries[0] == suggestion["claim_text"]
 def test_analyze_support_gaps_detects_author_year_citation_forms():
    verifier = FakeVerifier()
    text = """
 Computational research touching on movement of agents spans many different fields. Given that a rich repertoire of behaviors in biological organisms concerns movement, exploring the use of movement by evolving agents can open up many research questions that are directly comparable to work within biological systems (Tang and Bennett 2010).
 """
    payload = analyze_support_gaps(text, verifier=verifier, max_claims=2, min_claim_chars=60)
    assert payload["claim_count"] == 1
    assert payload["suggestion_count"] == 1
    suggestion = payload["suggestions"][0]
    assert suggestion["existing_citation_markers"] == ["(Tang and Bennett 2010)"]
    assert suggestion["existing_reference_titles"] == []
    assert "no matching reference titles were parsed" in suggestion["note"].lower()
 def test_analyze_support_gaps_ranks_less_cited_claims_first():
    verifier = FakeVerifier()
    text = """
 Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1]. Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources. Instead, in looking at the evolution of intelligent behavior, our primary interest is in finding out by what means less capable agents give rise to those able to appropriately exploit prevailing conditions.
 References
 [[1]]Neural computation of decisions in optimization problems
 """
    payload = analyze_support_gaps(text, verifier=verifier, max_claims=3, min_claim_chars=40)
    assert payload["suggestion_count"] == 2
    first, second = payload["suggestions"]
    assert first["existing_citation_markers"] == []
    assert second["existing_citation_markers"] == ["1"]
    assert first["needs_support_score"] > second["needs_support_score"]
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -250,6 +250,54 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
    assert payload[0]["entry"]["citation_key"] == "candidate2024"
 def test_cli_support_claims_outputs_json(tmp_path: Path):
    input_path = tmp_path / "claims.txt"
    input_path.write_text(
        """
 This is a long claim about digital organisms evolving intelligent movement strategies in open-ended environments [1].
 References
 [[1]]Existing cited paper
 """,
        encoding="utf-8",
    )
    with patch("citegeist.cli.analyze_support_gaps") as mocked_analyze:
        mocked_analyze.return_value = {
            "claim_count": 1,
            "existing_reference_count": 1,
            "suggestion_count": 1,
            "suggestions": [
                {
                    "claim_text": "This is a long claim.",
                    "existing_citation_markers": ["1"],
                    "existing_reference_titles": ["Existing cited paper"],
                    "suggested_references": [{"citation_key": "support2024", "title": "Support Paper"}],
                    "note": None,
                }
            ],
        }
        stdout_buffer = io.StringIO()
        with redirect_stdout(stdout_buffer):
            exit_code = main(
                [
                    "--db",
                    str(tmp_path / "library.sqlite3"),
                    "support-claims",
                    str(input_path),
                    "--context",
                    "artificial life",
                ]
            )
    assert exit_code == 0
    payload = json.loads(stdout_buffer.getvalue())
    assert payload["suggestion_count"] == 1
    assert payload["suggestions"][0]["suggested_references"][0]["citation_key"] == "support2024"
 def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
    stderr_buffer = io.StringIO()
    with redirect_stderr(stderr_buffer):
--- a/tests/test_literature_explorer_demo.py
+++ b/tests/test_literature_explorer_demo.py
@ -0,0 +1,13 @@
 from pathlib import Path
 def test_literature_explorer_demo_exposes_claim_support_panel():
    root = Path(__file__).resolve().parents[1]
    html = (root / "examples" / "literature-explorer" / "index.html").read_text(encoding="utf-8")
    js = (root / "examples" / "literature-explorer" / "literature-explorer.js").read_text(encoding="utf-8")
    assert 'id="claim-support-button"' in html
    assert 'id="claim-support-output"' in html
    assert "Needs Support" in html
    assert "supportClaims(text, options = {})" in js
    assert 'bridge.call("support_claims"' in js