Add ranked claim support analysis and demo UI
This commit is contained in:
parent
89bc56a7aa
commit
5cde9e54a6
|
|
@ -51,6 +51,7 @@ The initial repo includes:
|
||||||
- staged plaintext reference extraction that now preserves more structured metadata from legacy references, including year suffixes, identifiers, volume/issue/pages, and thesis/report/web-style venue hints;
|
- staged plaintext reference extraction that now preserves more structured metadata from legacy references, including year suffixes, identifiers, volume/issue/pages, and thesis/report/web-style venue hints;
|
||||||
- a reference-extraction backend seam with the local `heuristic` parser as the default implementation, so optional external backends can be added later without changing the core extract workflow;
|
- a reference-extraction backend seam with the local `heuristic` parser as the default implementation, so optional external backends can be added later without changing the core extract workflow;
|
||||||
- standalone verification and disambiguation of free-text references or partial BibTeX into auditable BibTeX/JSON results with `x_status`, `x_confidence`, `x_source`, `x_query`, and alternate-candidate traces;
|
- standalone verification and disambiguation of free-text references or partial BibTeX into auditable BibTeX/JSON results with `x_status`, `x_confidence`, `x_source`, `x_query`, and alternate-candidate traces;
|
||||||
|
- a first-pass claim-support workflow that can scan citation-bearing claim sentences in a text excerpt and suggest additional supporting references not already parsed from the excerpt's reference list;
|
||||||
- identifier-first metadata resolution for DOI, PMID/PubMed, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite/PubMed title-search fallback;
|
- identifier-first metadata resolution for DOI, PMID/PubMed, OpenAlex, DBLP, arXiv, and DataCite-backed entries, with OpenAlex/DataCite/PubMed title-search fallback;
|
||||||
- local citation-graph traversal over stored `cites`, `cited_by`, and `crossref` edges;
|
- local citation-graph traversal over stored `cites`, `cited_by`, and `crossref` edges;
|
||||||
- Crossref- and OpenAlex-backed graph expansion that materializes draft related works and edge provenance;
|
- Crossref- and OpenAlex-backed graph expansion that materializes draft related works and edge provenance;
|
||||||
|
|
@ -174,6 +175,7 @@ PYTHONPATH=src .venv/bin/python -m citegeist compare-extract references.txt --ba
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --string '"Graph-first bibliography augmentation" Smith 2024' --context "citation graphs" --format json
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --string 'Evans 1960' --context "bottlenose dolphin echolocation" --llm --llm-base-url http://localhost:11434 --llm-model qwen3 --llm-role both --format json
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
|
PYTHONPATH=src .venv/bin/python -m citegeist verify --bib draft.bib --output verified.bib
|
||||||
|
PYTHONPATH=src .venv/bin/python -m citegeist support-claims paper_excerpt.txt --context "artificial life"
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve smith2024graphs
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --preview --limit 25
|
||||||
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
|
PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 resolve-stubs --doi-only --all-misc --limit 25
|
||||||
|
|
|
||||||
|
|
@ -208,6 +208,15 @@ Exit criteria:
|
||||||
Status:
|
Status:
|
||||||
Early but serviceable. SQLite FTS covers the basic local-search path, but retrieval benchmarking, saved search workflows, and optional semantic ranking remain future work.
|
Early but serviceable. SQLite FTS covers the basic local-search path, but retrieval benchmarking, saved search workflows, and optional semantic ranking remain future work.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The new `support-claims` feature is an early bridge from bibliography work into
|
||||||
|
claim-oriented literature assistance. Its current scope is intentionally narrow:
|
||||||
|
segment citation-bearing claim sentences from a text excerpt, parse already
|
||||||
|
listed references when possible, and suggest additional candidate support using
|
||||||
|
the existing verifier/resolver stack. The next quality steps are better claim
|
||||||
|
segmentation, stronger deduping against already-used sources, and UI review
|
||||||
|
surfaces for per-claim suggestions.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
The repository now has a small app-facing JSON adapter surface, a lightweight local HTTP bridge, and a static literature-explorer demo shell. That is enough for a browser or desktop-web shell to drive topic discovery, topic expansion, extraction, verification, entry inspection, and lightweight graph exploration against one local database. It is still a demo boundary rather than a full multi-user application or long-running service architecture.
|
The repository now has a small app-facing JSON adapter surface, a lightweight local HTTP bridge, and a static literature-explorer demo shell. That is enough for a browser or desktop-web shell to drive topic discovery, topic expansion, extraction, verification, entry inspection, and lightweight graph exploration against one local database. It is still a demo boundary rather than a full multi-user application or long-running service architecture.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -405,6 +405,62 @@
|
||||||
color: var(--ink);
|
color: var(--ink);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.claim-stack {
|
||||||
|
display: grid;
|
||||||
|
gap: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-card {
|
||||||
|
padding: 0.95rem 1rem;
|
||||||
|
border-radius: 18px;
|
||||||
|
background: rgba(255, 255, 255, 0.78);
|
||||||
|
border: 1px solid rgba(73, 57, 35, 0.11);
|
||||||
|
display: grid;
|
||||||
|
gap: 0.6rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-score {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.45rem;
|
||||||
|
font-size: 0.78rem;
|
||||||
|
font-weight: 700;
|
||||||
|
letter-spacing: 0.03em;
|
||||||
|
color: #6b230f;
|
||||||
|
background: #f4dfd3;
|
||||||
|
border: 1px solid rgba(141, 63, 45, 0.16);
|
||||||
|
border-radius: 999px;
|
||||||
|
padding: 0.3rem 0.62rem;
|
||||||
|
width: fit-content;
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-text {
|
||||||
|
color: var(--ink);
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-note {
|
||||||
|
font-size: 0.88rem;
|
||||||
|
color: var(--muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-ref-list {
|
||||||
|
display: grid;
|
||||||
|
gap: 0.55rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-ref {
|
||||||
|
padding: 0.75rem 0.85rem;
|
||||||
|
border-radius: 14px;
|
||||||
|
background: rgba(245, 239, 229, 0.68);
|
||||||
|
border: 1px solid rgba(73, 57, 35, 0.09);
|
||||||
|
}
|
||||||
|
|
||||||
|
.claim-ref strong {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 0.15rem;
|
||||||
|
}
|
||||||
|
|
||||||
.endpoint-card {
|
.endpoint-card {
|
||||||
border-radius: 18px;
|
border-radius: 18px;
|
||||||
border: 1px solid rgba(73, 57, 35, 0.11);
|
border: 1px solid rgba(73, 57, 35, 0.11);
|
||||||
|
|
@ -641,6 +697,31 @@
|
||||||
<button id="verify-button" class="secondary">Verify String</button>
|
<button id="verify-button" class="secondary">Verify String</button>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section class="panel card">
|
||||||
|
<h2>Claim Support</h2>
|
||||||
|
<label>
|
||||||
|
Claim-Like Excerpt
|
||||||
|
<textarea id="claim-support-text">Computational research touching on movement of agents spans many different fields. Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1]. Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources.</textarea>
|
||||||
|
</label>
|
||||||
|
<div class="row-3">
|
||||||
|
<label>
|
||||||
|
Context
|
||||||
|
<input id="claim-support-context" value="artificial life" />
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
Max Claims
|
||||||
|
<input id="claim-support-max-claims" type="number" min="1" value="5" />
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
Min Claim Chars
|
||||||
|
<input id="claim-support-min-chars" type="number" min="20" value="80" />
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="toolbar">
|
||||||
|
<button id="claim-support-button" class="primary full">Suggest Support</button>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
</div>
|
</div>
|
||||||
</aside>
|
</aside>
|
||||||
|
|
||||||
|
|
@ -696,6 +777,11 @@
|
||||||
</section>
|
</section>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section class="panel card">
|
||||||
|
<h2>Claim Support Review</h2>
|
||||||
|
<div id="claim-support-output" class="empty">Run claim support to rank support-worthy assertions and inspect suggested references.</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
<section class="panel card">
|
<section class="panel card">
|
||||||
<h2>Graph View</h2>
|
<h2>Graph View</h2>
|
||||||
<div id="graph-output" class="empty">Load a topic to view a small local network around its first few entries.</div>
|
<div id="graph-output" class="empty">Load a topic to view a small local network around its first few entries.</div>
|
||||||
|
|
@ -796,6 +882,7 @@
|
||||||
searchResults: document.getElementById("search-results"),
|
searchResults: document.getElementById("search-results"),
|
||||||
graphOutput: document.getElementById("graph-output"),
|
graphOutput: document.getElementById("graph-output"),
|
||||||
extractVerifyOutput: document.getElementById("extract-verify-output"),
|
extractVerifyOutput: document.getElementById("extract-verify-output"),
|
||||||
|
claimSupportOutput: document.getElementById("claim-support-output"),
|
||||||
activityLog: document.getElementById("activity-log"),
|
activityLog: document.getElementById("activity-log"),
|
||||||
metricTopicCount: document.getElementById("metric-topic-count"),
|
metricTopicCount: document.getElementById("metric-topic-count"),
|
||||||
metricEntryCount: document.getElementById("metric-entry-count"),
|
metricEntryCount: document.getElementById("metric-entry-count"),
|
||||||
|
|
@ -835,6 +922,11 @@
|
||||||
extractText: document.getElementById("extract-text"),
|
extractText: document.getElementById("extract-text"),
|
||||||
extractButton: document.getElementById("extract-button"),
|
extractButton: document.getElementById("extract-button"),
|
||||||
verifyButton: document.getElementById("verify-button"),
|
verifyButton: document.getElementById("verify-button"),
|
||||||
|
claimSupportText: document.getElementById("claim-support-text"),
|
||||||
|
claimSupportContext: document.getElementById("claim-support-context"),
|
||||||
|
claimSupportMaxClaims: document.getElementById("claim-support-max-claims"),
|
||||||
|
claimSupportMinChars: document.getElementById("claim-support-min-chars"),
|
||||||
|
claimSupportButton: document.getElementById("claim-support-button"),
|
||||||
};
|
};
|
||||||
|
|
||||||
els.serverUrl.value = state.bridgeUrl;
|
els.serverUrl.value = state.bridgeUrl;
|
||||||
|
|
@ -1057,6 +1149,45 @@
|
||||||
els.extractVerifyOutput.textContent = JSON.stringify(payload, null, 2);
|
els.extractVerifyOutput.textContent = JSON.stringify(payload, null, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function renderClaimSupport(payload) {
|
||||||
|
const suggestions = payload?.suggestions || [];
|
||||||
|
if (!suggestions.length) {
|
||||||
|
renderEmpty(els.claimSupportOutput, "No ranked support suggestions yet. Try a longer excerpt or a different context phrase.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
els.claimSupportOutput.className = "claim-stack";
|
||||||
|
els.claimSupportOutput.innerHTML = `
|
||||||
|
<div class="summary-box">
|
||||||
|
<strong>Claim Support Summary</strong>
|
||||||
|
<p>${suggestions.length} ranked claims from ${payload.claim_count || 0} extracted candidates · ${payload.existing_reference_count || 0} parsed existing references.</p>
|
||||||
|
<p>Claims are ordered by <code>needs_support_score</code>, so uncited or under-supported assertions appear first.</p>
|
||||||
|
</div>
|
||||||
|
${suggestions.map((suggestion) => `
|
||||||
|
<div class="claim-card">
|
||||||
|
<span class="claim-score">Needs Support ${Number(suggestion.needs_support_score ?? 0).toFixed(3)}</span>
|
||||||
|
<div class="claim-text">${escapeHtml(suggestion.claim_text || "")}</div>
|
||||||
|
<div class="pill-row">
|
||||||
|
${(suggestion.existing_citation_markers || []).map((marker) => `<span class="pill">${escapeHtml(marker)}</span>`).join("") || '<span class="pill">no inline citations detected</span>'}
|
||||||
|
</div>
|
||||||
|
${suggestion.note ? `<div class="claim-note">${escapeHtml(suggestion.note)}</div>` : ""}
|
||||||
|
<div class="claim-ref-list">
|
||||||
|
${(suggestion.suggested_references || []).map((reference) => `
|
||||||
|
<div class="claim-ref">
|
||||||
|
<strong>${escapeHtml(reference.title || reference.citation_key || "candidate")}</strong>
|
||||||
|
<p>${escapeHtml(reference.authors || "Unknown authors")} · ${escapeHtml(reference.year || "n.d.")} · score ${Number(reference.score ?? 0).toFixed(3)}</p>
|
||||||
|
<div class="pill-row">
|
||||||
|
${reference.journal ? `<span class="pill">${escapeHtml(reference.journal)}</span>` : ""}
|
||||||
|
${reference.doi ? `<span class="pill">${escapeHtml(reference.doi)}</span>` : ""}
|
||||||
|
${reference.source_label ? `<span class="pill">${escapeHtml(reference.source_label)}</span>` : ""}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join("")}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join("")}
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
function renderExpandSummary(payload) {
|
function renderExpandSummary(payload) {
|
||||||
if (!els.expandSummary) return;
|
if (!els.expandSummary) return;
|
||||||
const results = payload?.results || [];
|
const results = payload?.results || [];
|
||||||
|
|
@ -1320,6 +1451,29 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function runClaimSupport() {
|
||||||
|
if (!state.client) {
|
||||||
|
setStatus("Connect to the server first.", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setBusy(els.claimSupportButton, true);
|
||||||
|
try {
|
||||||
|
const payload = await state.client.supportClaims(els.claimSupportText.value, {
|
||||||
|
context: els.claimSupportContext.value.trim(),
|
||||||
|
limit: 5,
|
||||||
|
max_claims: Number(els.claimSupportMaxClaims.value || 5),
|
||||||
|
min_claim_chars: Number(els.claimSupportMinChars.value || 80),
|
||||||
|
});
|
||||||
|
renderClaimSupport(payload);
|
||||||
|
setLastOp("support_claims");
|
||||||
|
logActivity("support_claims", payload);
|
||||||
|
} catch (error) {
|
||||||
|
setStatus(String(error.message || error), "error");
|
||||||
|
} finally {
|
||||||
|
setBusy(els.claimSupportButton, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function exportTopicBibtex(topicSlug) {
|
async function exportTopicBibtex(topicSlug) {
|
||||||
if (!state.client || !topicSlug) {
|
if (!state.client || !topicSlug) {
|
||||||
setStatus("Connect to the server first.", "error");
|
setStatus("Connect to the server first.", "error");
|
||||||
|
|
@ -1408,6 +1562,7 @@
|
||||||
els.searchButton.addEventListener("click", runSearch);
|
els.searchButton.addEventListener("click", runSearch);
|
||||||
els.extractButton.addEventListener("click", runExtract);
|
els.extractButton.addEventListener("click", runExtract);
|
||||||
els.verifyButton.addEventListener("click", runVerify);
|
els.verifyButton.addEventListener("click", runVerify);
|
||||||
|
els.claimSupportButton.addEventListener("click", runClaimSupport);
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,9 @@ export function createLiteratureExplorerClient(bridge) {
|
||||||
verifyStrings(values, options = {}) {
|
verifyStrings(values, options = {}) {
|
||||||
return bridge.call("verify_strings", { values, ...options });
|
return bridge.call("verify_strings", { values, ...options });
|
||||||
},
|
},
|
||||||
|
supportClaims(text, options = {}) {
|
||||||
|
return bridge.call("support_claims", { text, ...options });
|
||||||
|
},
|
||||||
verifyBibtex(bibtexText, options = {}) {
|
verifyBibtex(bibtexText, options = {}) {
|
||||||
return bridge.call("verify_bibtex", { bibtex_text: bibtexText, ...options });
|
return bridge.call("verify_bibtex", { bibtex_text: bibtexText, ...options });
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from dataclasses import asdict
|
||||||
|
|
||||||
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
||||||
from .bootstrap import Bootstrapper
|
from .bootstrap import Bootstrapper
|
||||||
|
from .claim_support import analyze_support_gaps
|
||||||
from .expand import TopicExpander
|
from .expand import TopicExpander
|
||||||
from .extract import extract_references
|
from .extract import extract_references
|
||||||
from .storage import BibliographyStore
|
from .storage import BibliographyStore
|
||||||
|
|
@ -42,6 +43,7 @@ class LiteratureExplorerApi:
|
||||||
"expand_topic",
|
"expand_topic",
|
||||||
"extract_text",
|
"extract_text",
|
||||||
"verify_strings",
|
"verify_strings",
|
||||||
|
"support_claims",
|
||||||
"graph",
|
"graph",
|
||||||
],
|
],
|
||||||
"preview_operations": ["bootstrap", "expand_topic"],
|
"preview_operations": ["bootstrap", "expand_topic"],
|
||||||
|
|
@ -216,6 +218,26 @@ class LiteratureExplorerApi:
|
||||||
"results": [_verification_payload(result) for result in results],
|
"results": [_verification_payload(result) for result in results],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def support_claims(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
context: str = "",
|
||||||
|
limit: int = 5,
|
||||||
|
max_claims: int = 8,
|
||||||
|
min_claim_chars: int = 90,
|
||||||
|
) -> dict[str, object]:
|
||||||
|
payload = analyze_support_gaps(
|
||||||
|
text,
|
||||||
|
verifier=self.verifier,
|
||||||
|
context=context,
|
||||||
|
limit=limit,
|
||||||
|
max_claims=max_claims,
|
||||||
|
min_claim_chars=min_claim_chars,
|
||||||
|
)
|
||||||
|
payload["context"] = context
|
||||||
|
return payload
|
||||||
|
|
||||||
def verify_bibtex(self, bibtex_text: str, *, context: str = "", limit: int = 5) -> dict[str, object]:
|
def verify_bibtex(self, bibtex_text: str, *, context: str = "", limit: int = 5) -> dict[str, object]:
|
||||||
entries = parse_bibtex(bibtex_text)
|
entries = parse_bibtex(bibtex_text)
|
||||||
results = [self.verifier.verify_bib_entry(entry, context=context, limit=limit) for entry in entries]
|
results = [self.verifier.verify_bib_entry(entry, context=context, limit=limit) for entry in entries]
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,14 @@ class LiteratureExplorerAppServer:
|
||||||
context=str(params.get("context") or ""),
|
context=str(params.get("context") or ""),
|
||||||
limit=int(params.get("limit", 5)),
|
limit=int(params.get("limit", 5)),
|
||||||
)
|
)
|
||||||
|
if method == "support_claims":
|
||||||
|
return self.api.support_claims(
|
||||||
|
str(params.get("text") or ""),
|
||||||
|
context=str(params.get("context") or ""),
|
||||||
|
limit=int(params.get("limit", 5)),
|
||||||
|
max_claims=int(params.get("max_claims", 8)),
|
||||||
|
min_claim_chars=int(params.get("min_claim_chars", 90)),
|
||||||
|
)
|
||||||
if method == "verify_bibtex":
|
if method == "verify_bibtex":
|
||||||
return self.api.verify_bibtex(
|
return self.api.verify_bibtex(
|
||||||
str(params.get("bibtex_text") or ""),
|
str(params.get("bibtex_text") or ""),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,307 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .verify import BibliographyVerifier
|
||||||
|
|
||||||
|
|
||||||
|
CLAIM_MARKER = "✅"
|
||||||
|
NUMERIC_CITATION_PATTERN = re.compile(r"\[(\d+)\]")
|
||||||
|
AUTHOR_YEAR_PAREN_PATTERN = re.compile(
|
||||||
|
r"\(([A-Z][A-Za-z'’.-]+(?:\s+(?:and|&|et al\.?))?(?:\s+[A-Z][A-Za-z'’.-]+)*,?\s+\d{4}[a-z]?)\)"
|
||||||
|
)
|
||||||
|
AUTHOR_YEAR_INLINE_PATTERN = re.compile(
|
||||||
|
r"\b([A-Z][A-Za-z'’.-]+(?:\s+(?:and|&|et al\.?))?(?:\s+[A-Z][A-Za-z'’.-]+)*)\s*\((\d{4}[a-z]?)\)"
|
||||||
|
)
|
||||||
|
REFERENCE_ENTRY_PATTERN = re.compile(r"^\s*\[\[(\d+)\]\]\s*(.+)$", re.MULTILINE)
|
||||||
|
SENTENCE_SPLIT_PATTERN = re.compile(r'(?<=[.!?])\s+(?=[A-Z0-9"\[])')
|
||||||
|
SECTION_HEADER_PATTERN = re.compile(r"^(?:[IVX]+\.|[A-Z]\.)\s+[A-Z]")
|
||||||
|
CONTINUATION_START_PATTERN = re.compile(
|
||||||
|
r"^(?:instead|rather|thus|therefore|however|moreover|further|furthermore|"
|
||||||
|
r"because|given that|in most cases|for many purposes|these|this|such|it|they|"
|
||||||
|
r"another|the same|that |those )",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
CLAIM_SIGNAL_PATTERN = re.compile(
|
||||||
|
r"\b(?:we|our|this|these|those|research|results?|findings?|analysis|approach|model(?:ing)?|"
|
||||||
|
r"study|studies|work|movement|evolution(?:ary)?|agents?|organisms?|intelligence|behavior|"
|
||||||
|
r"behaviour|environment(?:al)?|resource(?:s)?|strategy|strategies|generaliz(?:e|ation)|"
|
||||||
|
r"suggest(?:s|ed)?|indicat(?:es|ed)|show(?:s|ed)?|demonstrat(?:e|es|ed)|permit(?:s|ted)?|"
|
||||||
|
r"require(?:s|d)?|provide(?:s|d)?|span(?:s|ned)?|range(?:s|d)?|covers?|across|exploit(?:s|ed)?|"
|
||||||
|
r"emerge(?:s|d)|evolved?|hypothesis|goal|question|capabilit(?:y|ies)|complex(?:ity)?|"
|
||||||
|
r"resource peak|gradient ascent|optimal|random walk|turing-complete)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
NON_CLAIM_START_PATTERN = re.compile(
|
||||||
|
r"^(?:abstract|introduction|methods|results|discussion|future work|conclusions?|references|"
|
||||||
|
r"keywords?|fig\.|table\s|view\s+\d+|show\s+abstract|relevance:|optional|already cited|"
|
||||||
|
r"new references found)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ClaimSupportSuggestion:
|
||||||
|
claim_text: str
|
||||||
|
existing_citation_markers: list[str]
|
||||||
|
existing_reference_titles: list[str]
|
||||||
|
suggested_references: list[dict[str, object]]
|
||||||
|
needs_support_score: float
|
||||||
|
note: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"claim_text": self.claim_text,
|
||||||
|
"existing_citation_markers": list(self.existing_citation_markers),
|
||||||
|
"existing_reference_titles": list(self.existing_reference_titles),
|
||||||
|
"suggested_references": list(self.suggested_references),
|
||||||
|
"needs_support_score": round(float(self.needs_support_score), 3),
|
||||||
|
"note": self.note,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ClaimCandidate:
|
||||||
|
text: str
|
||||||
|
citation_markers: list[str]
|
||||||
|
needs_support_score: float
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_support_gaps(
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
verifier: BibliographyVerifier | None = None,
|
||||||
|
context: str = "",
|
||||||
|
limit: int = 5,
|
||||||
|
max_claims: int = 8,
|
||||||
|
min_claim_chars: int = 90,
|
||||||
|
) -> dict[str, object]:
|
||||||
|
verifier = verifier or BibliographyVerifier()
|
||||||
|
existing_references = _extract_existing_references(text)
|
||||||
|
existing_titles_normalized = {_normalize_title(title) for title in existing_references.values() if title}
|
||||||
|
claims = _extract_claim_candidates(text, max_claims=max_claims, min_claim_chars=min_claim_chars)
|
||||||
|
|
||||||
|
suggestions: list[ClaimSupportSuggestion] = []
|
||||||
|
for claim in claims:
|
||||||
|
referenced_titles = [
|
||||||
|
existing_references[marker]
|
||||||
|
for marker in claim.citation_markers
|
||||||
|
if marker in existing_references and existing_references[marker]
|
||||||
|
]
|
||||||
|
verification = verifier.verify_string(claim.text, context=context, limit=limit)
|
||||||
|
candidates = [verification.entry, *[alt.entry for alt in verification.alternates]]
|
||||||
|
sources = [verification.source_label, *[alt.source_label for alt in verification.alternates]]
|
||||||
|
scores = [verification.confidence, *[alt.score for alt in verification.alternates]]
|
||||||
|
|
||||||
|
rendered: list[dict[str, object]] = []
|
||||||
|
seen_titles: set[str] = set()
|
||||||
|
for entry, source_label, score in zip(candidates, sources, scores):
|
||||||
|
title = str(entry.fields.get("title") or "").strip()
|
||||||
|
normalized_title = _normalize_title(title)
|
||||||
|
if not title or normalized_title in existing_titles_normalized or normalized_title in seen_titles:
|
||||||
|
continue
|
||||||
|
seen_titles.add(normalized_title)
|
||||||
|
rendered.append(
|
||||||
|
{
|
||||||
|
"citation_key": entry.citation_key,
|
||||||
|
"entry_type": entry.entry_type,
|
||||||
|
"title": title,
|
||||||
|
"authors": str(entry.fields.get("author") or ""),
|
||||||
|
"year": str(entry.fields.get("year") or ""),
|
||||||
|
"doi": str(entry.fields.get("doi") or ""),
|
||||||
|
"journal": str(entry.fields.get("journal") or entry.fields.get("booktitle") or ""),
|
||||||
|
"source_label": source_label,
|
||||||
|
"score": round(float(score), 4),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if rendered:
|
||||||
|
suggestions.append(
|
||||||
|
ClaimSupportSuggestion(
|
||||||
|
claim_text=claim.text,
|
||||||
|
existing_citation_markers=claim.citation_markers,
|
||||||
|
existing_reference_titles=referenced_titles,
|
||||||
|
suggested_references=rendered,
|
||||||
|
needs_support_score=claim.needs_support_score,
|
||||||
|
note=_build_note(claim.citation_markers, referenced_titles),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
suggestions.sort(
|
||||||
|
key=lambda item: (
|
||||||
|
item.needs_support_score,
|
||||||
|
len(item.suggested_references),
|
||||||
|
len(item.claim_text),
|
||||||
|
),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"claim_count": len(claims),
|
||||||
|
"existing_reference_count": len(existing_references),
|
||||||
|
"suggestion_count": len(suggestions),
|
||||||
|
"suggestions": [item.to_dict() for item in suggestions],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_claim_candidates(text: str, *, max_claims: int, min_claim_chars: int) -> list[ClaimCandidate]:
|
||||||
|
body = text.partition("References")[0] if "References" in text else text
|
||||||
|
sentences = _prepare_sentences(body)
|
||||||
|
claims: list[ClaimCandidate] = []
|
||||||
|
index = 0
|
||||||
|
while index < len(sentences):
|
||||||
|
current = sentences[index]
|
||||||
|
if not _is_claim_like(current, min_claim_chars=min_claim_chars):
|
||||||
|
index += 1
|
||||||
|
continue
|
||||||
|
parts = [current]
|
||||||
|
index += 1
|
||||||
|
while index < len(sentences) and _should_merge_continuation(parts[-1], sentences[index], min_claim_chars=min_claim_chars):
|
||||||
|
parts.append(sentences[index])
|
||||||
|
index += 1
|
||||||
|
claim_text = " ".join(parts).strip()
|
||||||
|
if len(claim_text) < min_claim_chars:
|
||||||
|
continue
|
||||||
|
claims.append(
|
||||||
|
ClaimCandidate(
|
||||||
|
text=claim_text,
|
||||||
|
citation_markers=_extract_citation_markers(claim_text),
|
||||||
|
needs_support_score=_score_claim_need(claim_text),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(claims) >= max_claims:
|
||||||
|
break
|
||||||
|
return claims
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_sentences(body: str) -> list[str]:
|
||||||
|
cleaned_body = body.replace(CLAIM_MARKER, " ").replace("✅", " ")
|
||||||
|
cleaned_body = re.sub(r"\s+", " ", cleaned_body)
|
||||||
|
sentences: list[str] = []
|
||||||
|
for sentence in SENTENCE_SPLIT_PATTERN.split(cleaned_body):
|
||||||
|
cleaned = sentence.strip()
|
||||||
|
if not cleaned:
|
||||||
|
continue
|
||||||
|
if cleaned.upper() == cleaned and len(cleaned) > 24:
|
||||||
|
continue
|
||||||
|
if NON_CLAIM_START_PATTERN.match(cleaned):
|
||||||
|
continue
|
||||||
|
if SECTION_HEADER_PATTERN.match(cleaned):
|
||||||
|
continue
|
||||||
|
sentences.append(cleaned)
|
||||||
|
return sentences
|
||||||
|
|
||||||
|
|
||||||
|
def _is_claim_like(sentence: str, *, min_claim_chars: int) -> bool:
|
||||||
|
if len(sentence) < max(45, min_claim_chars // 2):
|
||||||
|
return False
|
||||||
|
if sentence.startswith("[["):
|
||||||
|
return False
|
||||||
|
if NUMERIC_CITATION_PATTERN.search(sentence):
|
||||||
|
return True
|
||||||
|
if AUTHOR_YEAR_PAREN_PATTERN.search(sentence) or AUTHOR_YEAR_INLINE_PATTERN.search(sentence):
|
||||||
|
return True
|
||||||
|
if CLAIM_SIGNAL_PATTERN.search(sentence) and (len(sentence) >= min_claim_chars or sentence.count(",") >= 1):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _should_merge_continuation(current: str, next_sentence: str, *, min_claim_chars: int) -> bool:
|
||||||
|
if len(current) >= max(min_claim_chars * 3, 320):
|
||||||
|
return False
|
||||||
|
if not _is_claim_like(next_sentence, min_claim_chars=max(45, min_claim_chars // 2)):
|
||||||
|
return False
|
||||||
|
if CONTINUATION_START_PATTERN.match(next_sentence):
|
||||||
|
return True
|
||||||
|
current_markers = _extract_citation_markers(current)
|
||||||
|
next_markers = _extract_citation_markers(next_sentence)
|
||||||
|
if next_markers and not current_markers:
|
||||||
|
return True
|
||||||
|
if current_markers and len(next_sentence) < max(min_claim_chars, 180):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_existing_references(text: str) -> dict[str, str]:
|
||||||
|
if "References" not in text:
|
||||||
|
return {}
|
||||||
|
_, _, tail = text.partition("References")
|
||||||
|
references: dict[str, str] = {}
|
||||||
|
for match in REFERENCE_ENTRY_PATTERN.finditer(tail):
|
||||||
|
marker = match.group(1)
|
||||||
|
title = match.group(2).strip()
|
||||||
|
references[marker] = title
|
||||||
|
return references
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_citation_markers(text: str) -> list[str]:
|
||||||
|
markers: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for match in NUMERIC_CITATION_PATTERN.finditer(text):
|
||||||
|
marker = match.group(1)
|
||||||
|
if marker not in seen:
|
||||||
|
seen.add(marker)
|
||||||
|
markers.append(marker)
|
||||||
|
for match in AUTHOR_YEAR_PAREN_PATTERN.finditer(text):
|
||||||
|
marker = f"({match.group(1)})"
|
||||||
|
if marker not in seen:
|
||||||
|
seen.add(marker)
|
||||||
|
markers.append(marker)
|
||||||
|
for match in AUTHOR_YEAR_INLINE_PATTERN.finditer(text):
|
||||||
|
marker = f"{match.group(1)} ({match.group(2)})"
|
||||||
|
if marker not in seen:
|
||||||
|
seen.add(marker)
|
||||||
|
markers.append(marker)
|
||||||
|
return markers
|
||||||
|
|
||||||
|
|
||||||
|
def _score_claim_need(text: str) -> float:
|
||||||
|
score = 0.0
|
||||||
|
markers = _extract_citation_markers(text)
|
||||||
|
length = len(text)
|
||||||
|
signal_count = len(CLAIM_SIGNAL_PATTERN.findall(text))
|
||||||
|
|
||||||
|
if not markers:
|
||||||
|
score += 3.0
|
||||||
|
else:
|
||||||
|
score += max(0.25, 1.5 - min(len(markers), 3) * 0.35)
|
||||||
|
if any(marker.isdigit() for marker in markers):
|
||||||
|
score += 0.35
|
||||||
|
|
||||||
|
if length >= 220:
|
||||||
|
score += 1.25
|
||||||
|
elif length >= 140:
|
||||||
|
score += 0.85
|
||||||
|
elif length >= 90:
|
||||||
|
score += 0.45
|
||||||
|
|
||||||
|
score += min(signal_count, 6) * 0.25
|
||||||
|
|
||||||
|
if "," in text:
|
||||||
|
score += 0.2
|
||||||
|
if any(token in text.lower() for token in ("suggest", "indicate", "show", "demonstrate", "require", "because")):
|
||||||
|
score += 0.3
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_title(value: str) -> str:
|
||||||
|
return re.sub(r"[^a-z0-9]+", " ", value.lower()).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _build_note(markers: list[str], titles: list[str]) -> str | None:
|
||||||
|
if not markers:
|
||||||
|
return "No existing inline citation markers detected for this claim."
|
||||||
|
if titles:
|
||||||
|
return f"Existing citations detected: {', '.join(_render_marker(marker) for marker in markers)}."
|
||||||
|
return (
|
||||||
|
"Inline citation markers detected "
|
||||||
|
f"({', '.join(_render_marker(marker) for marker in markers)}), but no matching reference titles were parsed."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_marker(marker: str) -> str:
|
||||||
|
if marker.isdigit():
|
||||||
|
return f"[{marker}]"
|
||||||
|
return marker
|
||||||
|
|
@ -10,6 +10,7 @@ from pathlib import Path
|
||||||
from .batch import BatchBootstrapRunner, load_batch_jobs
|
from .batch import BatchBootstrapRunner, load_batch_jobs
|
||||||
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
from .bibtex import BibEntry, parse_bibtex, render_bibtex
|
||||||
from .bootstrap import Bootstrapper
|
from .bootstrap import Bootstrapper
|
||||||
|
from .claim_support import analyze_support_gaps
|
||||||
from .examples.talkorigins import TalkOriginsScraper
|
from .examples.talkorigins import TalkOriginsScraper
|
||||||
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
|
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
|
||||||
from .notebook_export import export_notebook_topic_bundle
|
from .notebook_export import export_notebook_topic_bundle
|
||||||
|
|
@ -171,6 +172,22 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
)
|
)
|
||||||
verify_parser.add_argument("--output", help="Write verification results to a file instead of stdout")
|
verify_parser.add_argument("--output", help="Write verification results to a file instead of stdout")
|
||||||
|
|
||||||
|
support_claims_parser = subparsers.add_parser(
|
||||||
|
"support-claims",
|
||||||
|
help="Suggest additional supporting references for claim-like sentences in a text",
|
||||||
|
)
|
||||||
|
support_claims_parser.add_argument("input", help="Text file to analyze")
|
||||||
|
support_claims_parser.add_argument("--context", default="", help="Optional topic context used for scoring")
|
||||||
|
support_claims_parser.add_argument("--limit", type=int, default=5, help="Maximum candidates to inspect per claim")
|
||||||
|
support_claims_parser.add_argument("--max-claims", type=int, default=8, help="Maximum claim-like sentences to inspect")
|
||||||
|
support_claims_parser.add_argument(
|
||||||
|
"--min-claim-chars",
|
||||||
|
type=int,
|
||||||
|
default=90,
|
||||||
|
help="Minimum sentence length to consider as a claim candidate",
|
||||||
|
)
|
||||||
|
support_claims_parser.add_argument("--output", help="Write JSON results to a file instead of stdout")
|
||||||
|
|
||||||
resolve_parser = subparsers.add_parser("resolve", help="Enrich stored entries from external metadata sources")
|
resolve_parser = subparsers.add_parser("resolve", help="Enrich stored entries from external metadata sources")
|
||||||
resolve_parser.add_argument("citation_keys", nargs="+", help="Citation keys to enrich")
|
resolve_parser.add_argument("citation_keys", nargs="+", help="Citation keys to enrich")
|
||||||
|
|
||||||
|
|
@ -767,6 +784,15 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
llm_provider=args.llm_provider,
|
llm_provider=args.llm_provider,
|
||||||
llm_role=args.llm_role,
|
llm_role=args.llm_role,
|
||||||
)
|
)
|
||||||
|
if args.command == "support-claims":
|
||||||
|
return _run_support_claims(
|
||||||
|
Path(args.input),
|
||||||
|
args.context,
|
||||||
|
args.limit,
|
||||||
|
args.max_claims,
|
||||||
|
args.min_claim_chars,
|
||||||
|
args.output,
|
||||||
|
)
|
||||||
if args.command == "resolve":
|
if args.command == "resolve":
|
||||||
return _run_resolve(store, args.citation_keys)
|
return _run_resolve(store, args.citation_keys)
|
||||||
if args.command == "enrich-oa":
|
if args.command == "enrich-oa":
|
||||||
|
|
@ -1217,6 +1243,32 @@ def _run_verify(
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _run_support_claims(
|
||||||
|
input_path: Path,
|
||||||
|
context: str,
|
||||||
|
limit: int,
|
||||||
|
max_claims: int,
|
||||||
|
min_claim_chars: int,
|
||||||
|
output: str | None,
|
||||||
|
) -> int:
|
||||||
|
text = input_path.read_text(encoding="utf-8")
|
||||||
|
verifier = BibliographyVerifier()
|
||||||
|
payload = analyze_support_gaps(
|
||||||
|
text,
|
||||||
|
verifier=verifier,
|
||||||
|
context=context,
|
||||||
|
limit=limit,
|
||||||
|
max_claims=max_claims,
|
||||||
|
min_claim_chars=min_claim_chars,
|
||||||
|
)
|
||||||
|
rendered = json.dumps(payload, indent=2)
|
||||||
|
if output:
|
||||||
|
Path(output).write_text(rendered + "\n", encoding="utf-8")
|
||||||
|
else:
|
||||||
|
print(rendered)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def _print_progress(label: str, index: int, total: int, detail: str | None = None) -> None:
|
def _print_progress(label: str, index: int, total: int, detail: str | None = None) -> None:
|
||||||
message = f"[{index}/{total}] {label}"
|
message = f"[{index}/{total}] {label}"
|
||||||
if detail:
|
if detail:
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from citegeist.app_api import LiteratureExplorerApi
|
||||||
from citegeist.bibtex import BibEntry
|
from citegeist.bibtex import BibEntry
|
||||||
from citegeist.bootstrap import BootstrapResult
|
from citegeist.bootstrap import BootstrapResult
|
||||||
from citegeist.expand import ExpansionResult
|
from citegeist.expand import ExpansionResult
|
||||||
|
from citegeist.verify import VerificationMatch, VerificationResult
|
||||||
|
|
||||||
|
|
||||||
class FakeBootstrapper:
|
class FakeBootstrapper:
|
||||||
|
|
@ -80,6 +81,38 @@ class FakeTopicExpander:
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class FakeVerifier:
|
||||||
|
def verify_strings(self, values, context="", limit=5):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def verify_string(self, value: str, context: str = "", limit: int = 5):
|
||||||
|
return VerificationResult(
|
||||||
|
query=value,
|
||||||
|
context=context,
|
||||||
|
status="high_confidence",
|
||||||
|
confidence=0.88,
|
||||||
|
entry=BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="support2024",
|
||||||
|
fields={"title": "Support Paper", "year": "2024"},
|
||||||
|
),
|
||||||
|
source_label="openalex:search:Support Paper",
|
||||||
|
alternates=[
|
||||||
|
VerificationMatch(
|
||||||
|
entry=BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="alt2023",
|
||||||
|
fields={"title": "Alternate Support", "year": "2023"},
|
||||||
|
),
|
||||||
|
score=0.66,
|
||||||
|
source_label="crossref:search:Alternate Support",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
input_type="string",
|
||||||
|
input_key=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_literature_explorer_api_search_and_show_entry():
|
def test_literature_explorer_api_search_and_show_entry():
|
||||||
store = BibliographyStore()
|
store = BibliographyStore()
|
||||||
try:
|
try:
|
||||||
|
|
@ -119,6 +152,33 @@ def test_literature_explorer_api_capabilities_distinguish_metadata_and_expansion
|
||||||
assert payload["graph_expansion_sources"] == ["crossref", "openalex"]
|
assert payload["graph_expansion_sources"] == ["crossref", "openalex"]
|
||||||
assert payload["topic_expansion_sources"] == ["crossref", "openalex"]
|
assert payload["topic_expansion_sources"] == ["crossref", "openalex"]
|
||||||
assert payload["graph_relation_types"] == ["cites", "cited_by", "both"]
|
assert payload["graph_relation_types"] == ["cites", "cited_by", "both"]
|
||||||
|
assert "support_claims" in payload["operations"]
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_literature_explorer_api_support_claims_returns_suggestions():
|
||||||
|
store = BibliographyStore()
|
||||||
|
try:
|
||||||
|
api = LiteratureExplorerApi(store, verifier=FakeVerifier())
|
||||||
|
payload = api.support_claims(
|
||||||
|
"""
|
||||||
|
Long claim text about agents evolving intelligent movement strategies in multiple computational settings without enough direct support [1].
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
[[1]]Earlier Cited Paper
|
||||||
|
"""
|
||||||
|
,
|
||||||
|
context="artificial life",
|
||||||
|
limit=3,
|
||||||
|
max_claims=2,
|
||||||
|
min_claim_chars=40,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert payload["context"] == "artificial life"
|
||||||
|
assert payload["suggestion_count"] == 1
|
||||||
|
assert payload["suggestions"][0]["suggested_references"][0]["citation_key"] == "support2024"
|
||||||
finally:
|
finally:
|
||||||
store.close()
|
store.close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
from citegeist.bibtex import BibEntry
|
||||||
|
from citegeist.claim_support import analyze_support_gaps
|
||||||
|
from citegeist.verify import VerificationMatch, VerificationResult
|
||||||
|
|
||||||
|
|
||||||
|
class FakeVerifier:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.queries: list[str] = []
|
||||||
|
|
||||||
|
def verify_string(self, value: str, context: str = "", limit: int = 5) -> VerificationResult:
|
||||||
|
self.queries.append(value)
|
||||||
|
return VerificationResult(
|
||||||
|
query=value,
|
||||||
|
context=context,
|
||||||
|
status="high_confidence",
|
||||||
|
confidence=0.91,
|
||||||
|
entry=BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="new2020support",
|
||||||
|
fields={
|
||||||
|
"title": "A Better Support Paper",
|
||||||
|
"author": "Smith, Jane",
|
||||||
|
"year": "2020",
|
||||||
|
"doi": "10.1000/new",
|
||||||
|
"journal": "Journal of Better Support",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
source_label="openalex:search:A Better Support Paper",
|
||||||
|
alternates=[
|
||||||
|
VerificationMatch(
|
||||||
|
entry=BibEntry(
|
||||||
|
entry_type="article",
|
||||||
|
citation_key="cited1985",
|
||||||
|
fields={
|
||||||
|
"title": "Neural computation of decisions in optimization problems",
|
||||||
|
"author": "Hopfield, J. J. and Tank, D. W.",
|
||||||
|
"year": "1985",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
score=0.7,
|
||||||
|
source_label="crossref:search:Neural computation of decisions in optimization problems",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
input_type="string",
|
||||||
|
input_key=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_support_gaps_filters_existing_reference_titles():
|
||||||
|
verifier = FakeVerifier()
|
||||||
|
text = """
|
||||||
|
Computational research touching on movement of agents spans many different fields. Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1].
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
[[1]]Neural computation of decisions in optimization problems
|
||||||
|
J. J. Hopfield, David W. Tank
|
||||||
|
"""
|
||||||
|
payload = analyze_support_gaps(text, verifier=verifier, max_claims=3, min_claim_chars=40)
|
||||||
|
assert payload["claim_count"] == 1
|
||||||
|
assert payload["suggestion_count"] == 1
|
||||||
|
suggestion = payload["suggestions"][0]
|
||||||
|
assert suggestion["existing_citation_markers"] == ["1"]
|
||||||
|
assert suggestion["existing_reference_titles"] == ["Neural computation of decisions in optimization problems"]
|
||||||
|
assert suggestion["suggested_references"][0]["title"] == "A Better Support Paper"
|
||||||
|
assert suggestion["needs_support_score"] > 0
|
||||||
|
titles = [item["title"] for item in suggestion["suggested_references"]]
|
||||||
|
assert "Neural computation of decisions in optimization problems" not in titles
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_support_gaps_groups_adjacent_uncited_claim_sentences():
|
||||||
|
verifier = FakeVerifier()
|
||||||
|
text = """
|
||||||
|
Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources. Instead, in looking at the evolution of intelligent behavior, our primary interest is in finding out by what means less capable agents give rise to those able to appropriately exploit prevailing conditions.
|
||||||
|
"""
|
||||||
|
payload = analyze_support_gaps(text, verifier=verifier, max_claims=2, min_claim_chars=80)
|
||||||
|
assert payload["claim_count"] == 1
|
||||||
|
assert payload["suggestion_count"] == 1
|
||||||
|
suggestion = payload["suggestions"][0]
|
||||||
|
assert suggestion["existing_citation_markers"] == []
|
||||||
|
assert "No existing inline citation markers detected" in suggestion["note"]
|
||||||
|
assert "Instead, in looking at the evolution of intelligent behavior" in suggestion["claim_text"]
|
||||||
|
assert suggestion["needs_support_score"] > 3.0
|
||||||
|
assert len(verifier.queries) == 1
|
||||||
|
assert verifier.queries[0] == suggestion["claim_text"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_support_gaps_detects_author_year_citation_forms():
|
||||||
|
verifier = FakeVerifier()
|
||||||
|
text = """
|
||||||
|
Computational research touching on movement of agents spans many different fields. Given that a rich repertoire of behaviors in biological organisms concerns movement, exploring the use of movement by evolving agents can open up many research questions that are directly comparable to work within biological systems (Tang and Bennett 2010).
|
||||||
|
"""
|
||||||
|
payload = analyze_support_gaps(text, verifier=verifier, max_claims=2, min_claim_chars=60)
|
||||||
|
assert payload["claim_count"] == 1
|
||||||
|
assert payload["suggestion_count"] == 1
|
||||||
|
suggestion = payload["suggestions"][0]
|
||||||
|
assert suggestion["existing_citation_markers"] == ["(Tang and Bennett 2010)"]
|
||||||
|
assert suggestion["existing_reference_titles"] == []
|
||||||
|
assert "no matching reference titles were parsed" in suggestion["note"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_support_gaps_ranks_less_cited_claims_first():
|
||||||
|
verifier = FakeVerifier()
|
||||||
|
text = """
|
||||||
|
Movement may not be modeled at all, but simply assigned a cost value, as in work in artificial neural systems applied to the traveling salesman problem [1]. Our research takes an approach at an intermediate level, seeking to elucidate how evolutionary processes can result in individual control of existing movement capabilities in order to intelligently exploit environmental resources. Instead, in looking at the evolution of intelligent behavior, our primary interest is in finding out by what means less capable agents give rise to those able to appropriately exploit prevailing conditions.
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
[[1]]Neural computation of decisions in optimization problems
|
||||||
|
"""
|
||||||
|
payload = analyze_support_gaps(text, verifier=verifier, max_claims=3, min_claim_chars=40)
|
||||||
|
assert payload["suggestion_count"] == 2
|
||||||
|
first, second = payload["suggestions"]
|
||||||
|
assert first["existing_citation_markers"] == []
|
||||||
|
assert second["existing_citation_markers"] == ["1"]
|
||||||
|
assert first["needs_support_score"] > second["needs_support_score"]
|
||||||
|
|
@ -250,6 +250,54 @@ def test_cli_verify_bib_outputs_json(tmp_path: Path):
|
||||||
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
assert payload[0]["entry"]["citation_key"] == "candidate2024"
|
||||||
|
|
||||||
|
|
||||||
|
def test_cli_support_claims_outputs_json(tmp_path: Path):
|
||||||
|
input_path = tmp_path / "claims.txt"
|
||||||
|
input_path.write_text(
|
||||||
|
"""
|
||||||
|
This is a long claim about digital organisms evolving intelligent movement strategies in open-ended environments [1].
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
[[1]]Existing cited paper
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("citegeist.cli.analyze_support_gaps") as mocked_analyze:
|
||||||
|
mocked_analyze.return_value = {
|
||||||
|
"claim_count": 1,
|
||||||
|
"existing_reference_count": 1,
|
||||||
|
"suggestion_count": 1,
|
||||||
|
"suggestions": [
|
||||||
|
{
|
||||||
|
"claim_text": "This is a long claim.",
|
||||||
|
"existing_citation_markers": ["1"],
|
||||||
|
"existing_reference_titles": ["Existing cited paper"],
|
||||||
|
"suggested_references": [{"citation_key": "support2024", "title": "Support Paper"}],
|
||||||
|
"note": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout_buffer = io.StringIO()
|
||||||
|
with redirect_stdout(stdout_buffer):
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"--db",
|
||||||
|
str(tmp_path / "library.sqlite3"),
|
||||||
|
"support-claims",
|
||||||
|
str(input_path),
|
||||||
|
"--context",
|
||||||
|
"artificial life",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exit_code == 0
|
||||||
|
payload = json.loads(stdout_buffer.getvalue())
|
||||||
|
assert payload["suggestion_count"] == 1
|
||||||
|
assert payload["suggestions"][0]["suggested_references"][0]["citation_key"] == "support2024"
|
||||||
|
|
||||||
|
|
||||||
def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
|
def test_cli_verify_rejects_incomplete_llm_config(tmp_path: Path):
|
||||||
stderr_buffer = io.StringIO()
|
stderr_buffer = io.StringIO()
|
||||||
with redirect_stderr(stderr_buffer):
|
with redirect_stderr(stderr_buffer):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def test_literature_explorer_demo_exposes_claim_support_panel():
|
||||||
|
root = Path(__file__).resolve().parents[1]
|
||||||
|
html = (root / "examples" / "literature-explorer" / "index.html").read_text(encoding="utf-8")
|
||||||
|
js = (root / "examples" / "literature-explorer" / "literature-explorer.js").read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
assert 'id="claim-support-button"' in html
|
||||||
|
assert 'id="claim-support-output"' in html
|
||||||
|
assert "Needs Support" in html
|
||||||
|
assert "supportClaims(text, options = {})" in js
|
||||||
|
assert 'bridge.call("support_claims"' in js
|
||||||
Loading…
Reference in New Issue