Added literature explorer example
This commit is contained in:
parent
2459830b70
commit
7bdaf37c59
|
|
@ -5,3 +5,4 @@ __pycache__/
|
|||
*.pyc
|
||||
*.egg-info/
|
||||
library.sqlite3
|
||||
ops/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
abd358981999c08a4f777efa4f38f2d4990c42e3421342cf9e9b460bf9700e41
|
||||
|
|
@ -52,6 +52,8 @@
|
|||
position: sticky;
|
||||
top: 1rem;
|
||||
align-self: start;
|
||||
max-height: calc(100vh - 2rem);
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.content {
|
||||
|
|
@ -191,6 +193,8 @@
|
|||
.status.error { color: var(--danger); }
|
||||
.status.ok { color: var(--accent-2); }
|
||||
|
||||
.status.note { color: #5d4716; }
|
||||
|
||||
.meta-grid {
|
||||
display: grid;
|
||||
gap: 0.5rem;
|
||||
|
|
@ -382,6 +386,61 @@
|
|||
margin-top: 0.75rem;
|
||||
}
|
||||
|
||||
.api-reference {
|
||||
display: grid;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.summary-box {
|
||||
margin-top: 0.75rem;
|
||||
padding: 0.8rem 0.9rem;
|
||||
border-radius: 16px;
|
||||
background: rgba(255, 255, 255, 0.74);
|
||||
border: 1px solid rgba(73, 57, 35, 0.11);
|
||||
display: grid;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.summary-box strong {
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.endpoint-card {
|
||||
border-radius: 18px;
|
||||
border: 1px solid rgba(73, 57, 35, 0.11);
|
||||
background: rgba(255, 255, 255, 0.74);
|
||||
padding: 0.95rem 1rem;
|
||||
display: grid;
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.endpoint-head {
|
||||
display: flex;
|
||||
gap: 0.6rem;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.endpoint-method {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-width: 4.4rem;
|
||||
padding: 0.28rem 0.55rem;
|
||||
border-radius: 999px;
|
||||
background: var(--accent-2);
|
||||
color: #f7fbfb;
|
||||
font-size: 0.78rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
|
||||
.endpoint-path {
|
||||
font-family: "IBM Plex Mono", "SFMono-Regular", monospace;
|
||||
font-size: 0.9rem;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
@media (max-width: 1080px) {
|
||||
.shell { grid-template-columns: 1fr; }
|
||||
.sidebar { position: static; }
|
||||
|
|
@ -403,11 +462,16 @@
|
|||
<h2>Bridge</h2>
|
||||
<label>
|
||||
Server URL
|
||||
<input id="server-url" value="http://127.0.0.1:8765" />
|
||||
<input id="server-url" value="" />
|
||||
</label>
|
||||
<label>
|
||||
API Token
|
||||
<input id="api-token" type="password" value="" placeholder="Bearer token for /api access" />
|
||||
</label>
|
||||
<div class="toolbar">
|
||||
<button id="connect-button" class="primary">Connect</button>
|
||||
<button id="refresh-topics-button" class="tertiary">Refresh Topics</button>
|
||||
<button id="api-reference-button" class="secondary">API Reference</button>
|
||||
</div>
|
||||
<div id="connect-status" class="status">Not connected.</div>
|
||||
</section>
|
||||
|
|
@ -442,10 +506,47 @@
|
|||
<input id="bootstrap-status" value="draft" />
|
||||
</label>
|
||||
</div>
|
||||
<div class="row-3">
|
||||
<label>
|
||||
Expansion Mode
|
||||
<select id="bootstrap-expansion-mode">
|
||||
<option value="legacy">legacy</option>
|
||||
<option value="cites">cites</option>
|
||||
<option value="cited_by">cited_by</option>
|
||||
<option value="both">both</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Rounds
|
||||
<input id="bootstrap-expansion-rounds" type="number" min="1" value="3" />
|
||||
</label>
|
||||
<label>
|
||||
Recent Years
|
||||
<input id="bootstrap-recent-years" type="number" min="0" value="5" />
|
||||
</label>
|
||||
</div>
|
||||
<div class="row-3">
|
||||
<label>
|
||||
Recent Target
|
||||
<input id="bootstrap-target-recent" type="number" min="1" value="5" />
|
||||
</label>
|
||||
<label>
|
||||
Max Expanded Entries
|
||||
<input id="bootstrap-max-expanded-entries" type="number" min="1" value="100" />
|
||||
</label>
|
||||
<label>
|
||||
Max Expand Seconds
|
||||
<input id="bootstrap-max-expand-seconds" type="number" min="1" step="0.5" value="20" />
|
||||
</label>
|
||||
</div>
|
||||
<div class="toolbar">
|
||||
<button id="bootstrap-preview-button" class="secondary">Preview Bootstrap</button>
|
||||
<button id="bootstrap-commit-button" class="primary">Commit Bootstrap</button>
|
||||
</div>
|
||||
<div id="bootstrap-summary" class="summary-box">
|
||||
<strong>Bootstrap Policy</strong>
|
||||
<p>Use graph-limited bootstrap when you want topic seeding and expansion in one pass. The same expansion policy applies to preview and commit.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="panel card">
|
||||
|
|
@ -471,6 +572,7 @@
|
|||
<select id="expand-relation">
|
||||
<option value="cites">cites</option>
|
||||
<option value="cited_by">cited_by</option>
|
||||
<option value="both">both</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
|
|
@ -488,10 +590,28 @@
|
|||
<input id="expand-per-seed-limit" type="number" min="1" value="12" />
|
||||
</label>
|
||||
</div>
|
||||
<div class="row-3">
|
||||
<label>
|
||||
Rounds
|
||||
<input id="expand-rounds" type="number" min="1" value="3" />
|
||||
</label>
|
||||
<label>
|
||||
Recent Years
|
||||
<input id="expand-recent-years" type="number" min="0" value="5" />
|
||||
</label>
|
||||
<label>
|
||||
Recent Target
|
||||
<input id="expand-target-recent" type="number" min="1" value="10" />
|
||||
</label>
|
||||
</div>
|
||||
<div class="toolbar">
|
||||
<button id="expand-preview-button" class="secondary">Preview Expansion</button>
|
||||
<button id="expand-commit-button" class="primary">Apply Expansion</button>
|
||||
</div>
|
||||
<div id="expand-summary" class="summary-box">
|
||||
<strong>Expansion Policy</strong>
|
||||
<p>Use <code>cites</code> to bias toward newer work, or <code>both</code> for broader graph growth. Recursive rounds stop once the recent-entry target is met.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="panel card">
|
||||
|
|
@ -579,14 +699,83 @@
|
|||
<h2>Graph View</h2>
|
||||
<div id="graph-output" class="empty">Load a topic to view a small local network around its first few entries.</div>
|
||||
</section>
|
||||
|
||||
<section id="api-reference" class="panel card">
|
||||
<h2>API Reference</h2>
|
||||
<div class="api-reference">
|
||||
<div class="endpoint-card">
|
||||
<div class="endpoint-head">
|
||||
<span class="endpoint-method">GET</span>
|
||||
<span class="endpoint-path">/api/healthz</span>
|
||||
</div>
|
||||
<p>Health check endpoint. Does not require a bearer token.</p>
|
||||
</div>
|
||||
<div class="endpoint-card">
|
||||
<div class="endpoint-head">
|
||||
<span class="endpoint-method">GET</span>
|
||||
<span class="endpoint-path">/api/capabilities</span>
|
||||
</div>
|
||||
<p>Returns the available operation names and preview-capable actions. Requires <code>Authorization: Bearer <token></code>.</p>
|
||||
</div>
|
||||
<div class="endpoint-card">
|
||||
<div class="endpoint-head">
|
||||
<span class="endpoint-method">POST</span>
|
||||
<span class="endpoint-path">/api/call</span>
|
||||
</div>
|
||||
<p>RPC-style API entry point for search, topic loading, graph traversal, extraction, verification, and topic BibTeX export.</p>
|
||||
<div class="code-block">{
|
||||
"method": "expand_topic",
|
||||
"params": {
|
||||
"topic_slug": "acraniates",
|
||||
"relation_type": "cites",
|
||||
"max_rounds": 3,
|
||||
"recent_years": 5,
|
||||
"target_recent_entries": 10
|
||||
}
|
||||
}</div>
|
||||
</div>
|
||||
<div class="endpoint-card">
|
||||
<div class="endpoint-head">
|
||||
<span class="endpoint-method">POST</span>
|
||||
<span class="endpoint-path">/api/call bootstrap policy</span>
|
||||
</div>
|
||||
<p>Bootstrap also accepts expansion policy controls when you want bounded topic seeding plus graph growth in one step.</p>
|
||||
<div class="code-block">{
|
||||
"method": "bootstrap",
|
||||
"params": {
|
||||
"topic": "abiogenesis",
|
||||
"topic_slug": "abiogenesis",
|
||||
"expansion_mode": "cites",
|
||||
"expansion_rounds": 3,
|
||||
"recent_years": 5,
|
||||
"target_recent_entries": 5,
|
||||
"max_expanded_entries": 100,
|
||||
"max_expand_seconds": 20
|
||||
}
|
||||
}</div>
|
||||
</div>
|
||||
<div class="endpoint-card">
|
||||
<div class="endpoint-head">
|
||||
<span class="endpoint-method">AUTH</span>
|
||||
<span class="endpoint-path">Bearer token</span>
|
||||
</div>
|
||||
<p>Set the token in the sidebar once. The demo stores it in localStorage and attaches it to subsequent <code>/api/*</code> requests.</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
</div>
|
||||
|
||||
<script type="module">
|
||||
import { createHttpBridge, createLiteratureExplorerClient } from "./literature-explorer.js";
|
||||
|
||||
const DEFAULT_BRIDGE_URL = window.location.origin.startsWith("http")
|
||||
? `${window.location.origin}/api`
|
||||
: "http://127.0.0.1:8765";
|
||||
|
||||
const state = {
|
||||
bridgeUrl: "http://127.0.0.1:8765",
|
||||
bridgeUrl: localStorage.getItem("citegeist.bridgeUrl") || DEFAULT_BRIDGE_URL,
|
||||
apiToken: localStorage.getItem("citegeist.apiToken") || "",
|
||||
client: null,
|
||||
topics: [],
|
||||
activeTopic: null,
|
||||
|
|
@ -595,8 +784,10 @@
|
|||
|
||||
const els = {
|
||||
serverUrl: document.getElementById("server-url"),
|
||||
apiToken: document.getElementById("api-token"),
|
||||
connectButton: document.getElementById("connect-button"),
|
||||
refreshTopicsButton: document.getElementById("refresh-topics-button"),
|
||||
apiReferenceButton: document.getElementById("api-reference-button"),
|
||||
connectStatus: document.getElementById("connect-status"),
|
||||
topicsList: document.getElementById("topics-list"),
|
||||
topicView: document.getElementById("topic-view"),
|
||||
|
|
@ -614,8 +805,15 @@
|
|||
bootstrapTopicLimit: document.getElementById("bootstrap-topic-limit"),
|
||||
bootstrapCommitLimit: document.getElementById("bootstrap-commit-limit"),
|
||||
bootstrapStatus: document.getElementById("bootstrap-status"),
|
||||
bootstrapExpansionMode: document.getElementById("bootstrap-expansion-mode"),
|
||||
bootstrapExpansionRounds: document.getElementById("bootstrap-expansion-rounds"),
|
||||
bootstrapRecentYears: document.getElementById("bootstrap-recent-years"),
|
||||
bootstrapTargetRecent: document.getElementById("bootstrap-target-recent"),
|
||||
bootstrapMaxExpandedEntries: document.getElementById("bootstrap-max-expanded-entries"),
|
||||
bootstrapMaxExpandSeconds: document.getElementById("bootstrap-max-expand-seconds"),
|
||||
bootstrapPreviewButton: document.getElementById("bootstrap-preview-button"),
|
||||
bootstrapCommitButton: document.getElementById("bootstrap-commit-button"),
|
||||
bootstrapSummary: document.getElementById("bootstrap-summary"),
|
||||
expandTopicSlug: document.getElementById("expand-topic-slug"),
|
||||
expandTopicPhrase: document.getElementById("expand-topic-phrase"),
|
||||
expandSource: document.getElementById("expand-source"),
|
||||
|
|
@ -623,8 +821,12 @@
|
|||
expandMinRelevance: document.getElementById("expand-min-relevance"),
|
||||
expandSeedLimit: document.getElementById("expand-seed-limit"),
|
||||
expandPerSeedLimit: document.getElementById("expand-per-seed-limit"),
|
||||
expandRounds: document.getElementById("expand-rounds"),
|
||||
expandRecentYears: document.getElementById("expand-recent-years"),
|
||||
expandTargetRecent: document.getElementById("expand-target-recent"),
|
||||
expandPreviewButton: document.getElementById("expand-preview-button"),
|
||||
expandCommitButton: document.getElementById("expand-commit-button"),
|
||||
expandSummary: document.getElementById("expand-summary"),
|
||||
searchQuery: document.getElementById("search-query"),
|
||||
searchTopic: document.getElementById("search-topic"),
|
||||
searchButton: document.getElementById("search-button"),
|
||||
|
|
@ -633,6 +835,9 @@
|
|||
verifyButton: document.getElementById("verify-button"),
|
||||
};
|
||||
|
||||
els.serverUrl.value = state.bridgeUrl;
|
||||
els.apiToken.value = state.apiToken;
|
||||
|
||||
function setStatus(text, kind = "") {
|
||||
els.connectStatus.textContent = text;
|
||||
els.connectStatus.className = `status ${kind}`.trim();
|
||||
|
|
@ -706,6 +911,9 @@
|
|||
${topic.expansion_phrase ? `<span class="pill">${escapeHtml(topic.expansion_phrase)}</span>` : ""}
|
||||
${topic.source_url ? `<span class="pill">${escapeHtml(topic.source_url)}</span>` : ""}
|
||||
</div>
|
||||
<div class="toolbar">
|
||||
<button type="button" class="secondary" data-export-topic="${escapeHtml(topic.slug)}">Export Topic BibTeX</button>
|
||||
</div>
|
||||
</div>
|
||||
${entries.map((entry) => `
|
||||
<div class="list-item">
|
||||
|
|
@ -724,6 +932,11 @@
|
|||
await loadEntry(node.getAttribute("data-entry-key"));
|
||||
});
|
||||
});
|
||||
els.topicView.querySelectorAll("[data-export-topic]").forEach((node) => {
|
||||
node.addEventListener("click", async () => {
|
||||
await exportTopicBibtex(node.getAttribute("data-export-topic"));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function renderEntry(entry) {
|
||||
|
|
@ -842,11 +1055,40 @@
|
|||
els.extractVerifyOutput.textContent = JSON.stringify(payload, null, 2);
|
||||
}
|
||||
|
||||
function renderExpandSummary(payload) {
|
||||
if (!els.expandSummary) return;
|
||||
const results = payload?.results || [];
|
||||
const assigned = results.filter((item) => item.assigned_to_topic).length;
|
||||
const runMeta = payload?.run_meta || {};
|
||||
els.expandSummary.innerHTML = `
|
||||
<strong>Expansion Summary</strong>
|
||||
<p>${results.length} discoveries returned · ${assigned} assigned to topic · relation ${escapeHtml(els.expandRelation.value)} · rounds ${escapeHtml(els.expandRounds.value)}</p>
|
||||
<p>Recent target: ${escapeHtml(els.expandTargetRecent.value)} within ${escapeHtml(els.expandRecentYears.value)} years. Stop reason: <strong>${escapeHtml(runMeta.stop_reason || "unknown")}</strong>.</p>
|
||||
<p>Recent hits: ${escapeHtml(runMeta.recent_hits ?? 0)} · recent topic hits: ${escapeHtml(runMeta.recent_topic_hits ?? 0)}.</p>
|
||||
`;
|
||||
}
|
||||
|
||||
function renderBootstrapSummary(payload) {
|
||||
if (!els.bootstrapSummary) return;
|
||||
const results = payload?.results || [];
|
||||
const created = results.filter((item) => item.created).length;
|
||||
const runMeta = payload?.run_meta || {};
|
||||
els.bootstrapSummary.innerHTML = `
|
||||
<strong>Bootstrap Summary</strong>
|
||||
<p>${results.length} candidate entries returned · ${created} newly created in this pass · mode ${escapeHtml(els.bootstrapExpansionMode.value)} · rounds ${escapeHtml(els.bootstrapExpansionRounds.value)}</p>
|
||||
<p>Recent target: ${escapeHtml(els.bootstrapTargetRecent.value)} within ${escapeHtml(els.bootstrapRecentYears.value)} years · caps ${escapeHtml(els.bootstrapMaxExpandedEntries.value)} entries / ${escapeHtml(els.bootstrapMaxExpandSeconds.value)} seconds.</p>
|
||||
<p>Stop reason: <strong>${escapeHtml(runMeta.stop_reason || "unknown")}</strong> · expanded discoveries: ${escapeHtml(runMeta.expanded_discoveries ?? 0)} · recent topic hits: ${escapeHtml(runMeta.recent_topic_hits ?? 0)}.</p>
|
||||
`;
|
||||
}
|
||||
|
||||
async function connect() {
|
||||
setBusy(els.connectButton, true);
|
||||
try {
|
||||
state.bridgeUrl = els.serverUrl.value.trim() || state.bridgeUrl;
|
||||
const bridge = createHttpBridge(state.bridgeUrl);
|
||||
state.apiToken = els.apiToken.value.trim();
|
||||
localStorage.setItem("citegeist.bridgeUrl", state.bridgeUrl);
|
||||
localStorage.setItem("citegeist.apiToken", state.apiToken);
|
||||
const bridge = createHttpBridge(state.bridgeUrl, { token: state.apiToken });
|
||||
const client = createLiteratureExplorerClient(bridge);
|
||||
const capabilities = await client.capabilities();
|
||||
state.client = client;
|
||||
|
|
@ -919,10 +1161,17 @@
|
|||
topic_limit: Number(els.bootstrapTopicLimit.value || 5),
|
||||
topic_commit_limit: Number(els.bootstrapCommitLimit.value || 0) || null,
|
||||
preview_only: previewOnly,
|
||||
expand: false,
|
||||
expand: els.bootstrapExpansionMode.value !== "legacy",
|
||||
review_status: els.bootstrapStatus.value.trim() || "draft",
|
||||
expansion_mode: els.bootstrapExpansionMode.value,
|
||||
expansion_rounds: Number(els.bootstrapExpansionRounds.value || 1),
|
||||
recent_years: Number(els.bootstrapRecentYears.value || 0) || null,
|
||||
target_recent_entries: Number(els.bootstrapTargetRecent.value || 0) || null,
|
||||
max_expanded_entries: Number(els.bootstrapMaxExpandedEntries.value || 0) || null,
|
||||
max_expand_seconds: Number(els.bootstrapMaxExpandSeconds.value || 0) || null,
|
||||
});
|
||||
renderExtractVerify(payload);
|
||||
renderBootstrapSummary(payload);
|
||||
setLastOp(previewOnly ? "bootstrap_preview" : "bootstrap_commit");
|
||||
logActivity(previewOnly ? "bootstrap_preview" : "bootstrap_commit", payload);
|
||||
if (!previewOnly) {
|
||||
|
|
@ -954,9 +1203,13 @@
|
|||
min_relevance: Number(els.expandMinRelevance.value || 0.2),
|
||||
seed_limit: Number(els.expandSeedLimit.value || 10),
|
||||
per_seed_limit: Number(els.expandPerSeedLimit.value || 12),
|
||||
max_rounds: Number(els.expandRounds.value || 1),
|
||||
recent_years: Number(els.expandRecentYears.value || 0),
|
||||
target_recent_entries: Number(els.expandTargetRecent.value || 0) || null,
|
||||
preview_only: previewOnly,
|
||||
});
|
||||
renderExtractVerify(payload);
|
||||
renderExpandSummary(payload);
|
||||
setLastOp(previewOnly ? "expand_preview" : "expand_commit");
|
||||
logActivity(previewOnly ? "expand_preview" : "expand_commit", payload);
|
||||
if (!previewOnly && topicSlug) {
|
||||
|
|
@ -1027,6 +1280,29 @@
|
|||
}
|
||||
}
|
||||
|
||||
async function exportTopicBibtex(topicSlug) {
|
||||
if (!state.client || !topicSlug) {
|
||||
setStatus("Connect to the server first.", "error");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const payload = await state.client.exportTopicBibtex(topicSlug, { include_stubs: false });
|
||||
const filename = `${topicSlug}.bib`;
|
||||
downloadText(filename, payload?.bibtex || "");
|
||||
renderExtractVerify(payload);
|
||||
setLastOp("export_topic_bibtex");
|
||||
logActivity(`export_topic_bibtex:${topicSlug}`, payload);
|
||||
const skippedCount = Array.isArray(payload?.skipped) ? payload.skipped.length : 0;
|
||||
if (skippedCount) {
|
||||
setStatus(`Exported ${filename} with ${skippedCount} skipped malformed entr${skippedCount === 1 ? "y" : "ies"}.`, "ok");
|
||||
} else {
|
||||
setStatus(`Exported ${filename}`, "ok");
|
||||
}
|
||||
} catch (error) {
|
||||
setStatus(String(error.message || error), "error");
|
||||
}
|
||||
}
|
||||
|
||||
function escapeHtml(value) {
|
||||
return String(value ?? "")
|
||||
.replaceAll("&", "&")
|
||||
|
|
@ -1068,8 +1344,23 @@
|
|||
});
|
||||
}
|
||||
|
||||
function downloadText(filename, text) {
|
||||
const blob = new Blob([text], { type: "application/x-bibtex; charset=utf-8" });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement("a");
|
||||
anchor.href = url;
|
||||
anchor.download = filename;
|
||||
document.body.appendChild(anchor);
|
||||
anchor.click();
|
||||
anchor.remove();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
els.connectButton.addEventListener("click", connect);
|
||||
els.refreshTopicsButton.addEventListener("click", refreshTopics);
|
||||
els.apiReferenceButton.addEventListener("click", () => {
|
||||
document.getElementById("api-reference").scrollIntoView({ behavior: "smooth", block: "start" });
|
||||
});
|
||||
els.bootstrapPreviewButton.addEventListener("click", () => runBootstrap(true));
|
||||
els.bootstrapCommitButton.addEventListener("click", () => runBootstrap(false));
|
||||
els.expandPreviewButton.addEventListener("click", () => runExpand(true));
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ export function createLiteratureExplorerClient(bridge) {
|
|||
getTopic(topicSlug, options = {}) {
|
||||
return bridge.call("get_topic", { topic_slug: topicSlug, ...options });
|
||||
},
|
||||
exportTopicBibtex(topicSlug, options = {}) {
|
||||
return bridge.call("export_topic_bibtex", { topic_slug: topicSlug, ...options });
|
||||
},
|
||||
bootstrap(options = {}) {
|
||||
return bridge.call("bootstrap", options);
|
||||
},
|
||||
|
|
@ -36,12 +39,23 @@ export function createLiteratureExplorerClient(bridge) {
|
|||
};
|
||||
}
|
||||
|
||||
export function createHttpBridge(baseUrl = "http://127.0.0.1:8765") {
|
||||
function defaultApiBaseUrl() {
|
||||
if (typeof window !== "undefined" && window.location?.origin) {
|
||||
return `${window.location.origin}/api`;
|
||||
}
|
||||
return "http://127.0.0.1:8765";
|
||||
}
|
||||
|
||||
export function createHttpBridge(baseUrl = defaultApiBaseUrl(), options = {}) {
|
||||
const token = String(options.token || "").trim();
|
||||
return {
|
||||
async call(method, params = {}) {
|
||||
const response = await fetch(`${baseUrl}/call`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(token ? { Authorization: `Bearer ${token}` } : {}),
|
||||
},
|
||||
body: JSON.stringify({ method, params }),
|
||||
});
|
||||
const payload = await response.json();
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class LiteratureExplorerApi:
|
|||
"show_entry",
|
||||
"list_topics",
|
||||
"get_topic",
|
||||
"export_topic_bibtex",
|
||||
"bootstrap",
|
||||
"expand_topic",
|
||||
"extract_text",
|
||||
|
|
@ -81,6 +82,22 @@ class LiteratureExplorerApi:
|
|||
"entries": self.store.list_topic_entries(topic_slug, limit=entry_limit),
|
||||
}
|
||||
|
||||
def export_topic_bibtex(self, topic_slug: str, *, include_stubs: bool = False) -> dict[str, object] | None:
|
||||
topic = self.store.get_topic(topic_slug)
|
||||
if topic is None:
|
||||
return None
|
||||
entries = self.store.list_topic_entries(topic_slug, limit=100000)
|
||||
citation_keys = [row["citation_key"] for row in entries]
|
||||
export = self.store.export_bibtex_report(citation_keys, include_stubs=include_stubs)
|
||||
return {
|
||||
"topic": topic,
|
||||
"entry_count": len(citation_keys),
|
||||
"exported_count": export["exported_count"],
|
||||
"include_stubs": include_stubs,
|
||||
"skipped": export["skipped"],
|
||||
"bibtex": export["bibtex"],
|
||||
}
|
||||
|
||||
def bootstrap(
|
||||
self,
|
||||
*,
|
||||
|
|
@ -94,6 +111,12 @@ class LiteratureExplorerApi:
|
|||
expand: bool = True,
|
||||
preview_only: bool = False,
|
||||
review_status: str = "draft",
|
||||
expansion_mode: str = "legacy",
|
||||
expansion_rounds: int = 1,
|
||||
recent_years: int | None = None,
|
||||
target_recent_entries: int | None = None,
|
||||
max_expanded_entries: int | None = None,
|
||||
max_expand_seconds: float | None = None,
|
||||
) -> dict[str, object]:
|
||||
results = self.bootstrapper.bootstrap(
|
||||
self.store,
|
||||
|
|
@ -107,6 +130,12 @@ class LiteratureExplorerApi:
|
|||
topic_slug=topic_slug,
|
||||
topic_name=topic_name,
|
||||
topic_phrase=topic_phrase,
|
||||
expansion_mode=expansion_mode,
|
||||
expansion_rounds=expansion_rounds,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
max_expanded_entries=max_expanded_entries,
|
||||
max_expand_seconds=max_expand_seconds,
|
||||
)
|
||||
effective_slug = topic_slug
|
||||
if effective_slug is None and topic:
|
||||
|
|
@ -114,6 +143,7 @@ class LiteratureExplorerApi:
|
|||
payload: dict[str, object] = {
|
||||
"preview": preview_only,
|
||||
"results": [asdict(result) for result in results],
|
||||
"run_meta": dict(getattr(self.bootstrapper, "last_run_meta", {}) or {}),
|
||||
}
|
||||
if effective_slug is not None:
|
||||
payload["topic"] = self.store.get_topic(effective_slug)
|
||||
|
|
@ -132,6 +162,9 @@ class LiteratureExplorerApi:
|
|||
min_relevance: float = 0.2,
|
||||
seed_keys: list[str] | None = None,
|
||||
preview_only: bool = False,
|
||||
max_rounds: int = 1,
|
||||
recent_years: int | None = None,
|
||||
target_recent_entries: int | None = None,
|
||||
) -> dict[str, object] | None:
|
||||
topic = self.store.get_topic(topic_slug)
|
||||
if topic is None:
|
||||
|
|
@ -147,12 +180,16 @@ class LiteratureExplorerApi:
|
|||
min_relevance=min_relevance,
|
||||
seed_keys=seed_keys,
|
||||
preview_only=preview_only,
|
||||
max_rounds=max_rounds,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
)
|
||||
return {
|
||||
"topic": self.store.get_topic(topic_slug),
|
||||
"preview": preview_only,
|
||||
"results": [asdict(result) for result in results],
|
||||
"entries": self.store.list_topic_entries(topic_slug, limit=200),
|
||||
"run_meta": dict(getattr(self.topic_expander, "last_run_meta", {}) or {}),
|
||||
}
|
||||
|
||||
def extract_text(self, text: str, *, backend: str = "heuristic") -> dict[str, object]:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -12,8 +13,9 @@ from .storage import BibliographyStore
|
|||
|
||||
|
||||
class LiteratureExplorerAppServer:
|
||||
def __init__(self, api: LiteratureExplorerApi) -> None:
|
||||
def __init__(self, api: LiteratureExplorerApi, *, api_token: str | None = None) -> None:
|
||||
self.api = api
|
||||
self.api_token = (api_token or "").strip() or None
|
||||
|
||||
def dispatch(self, method: str, params: dict[str, Any] | None = None) -> Any:
|
||||
params = params or {}
|
||||
|
|
@ -42,6 +44,11 @@ class LiteratureExplorerAppServer:
|
|||
str(params.get("topic_slug") or ""),
|
||||
entry_limit=int(params.get("entry_limit", 100)),
|
||||
)
|
||||
if method == "export_topic_bibtex":
|
||||
return self.api.export_topic_bibtex(
|
||||
str(params.get("topic_slug") or ""),
|
||||
include_stubs=bool(params.get("include_stubs", False)),
|
||||
)
|
||||
if method == "bootstrap":
|
||||
return self.api.bootstrap(
|
||||
seed_bibtex=_optional_str(params.get("seed_bibtex")),
|
||||
|
|
@ -54,6 +61,12 @@ class LiteratureExplorerAppServer:
|
|||
expand=bool(params.get("expand", True)),
|
||||
preview_only=bool(params.get("preview_only", False)),
|
||||
review_status=str(params.get("review_status") or "draft"),
|
||||
expansion_mode=str(params.get("expansion_mode") or "legacy"),
|
||||
expansion_rounds=int(params.get("expansion_rounds", 1)),
|
||||
recent_years=_optional_int(params.get("recent_years")),
|
||||
target_recent_entries=_optional_int(params.get("target_recent_entries")),
|
||||
max_expanded_entries=_optional_int(params.get("max_expanded_entries")),
|
||||
max_expand_seconds=_optional_float(params.get("max_expand_seconds")),
|
||||
)
|
||||
if method == "expand_topic":
|
||||
return self.api.expand_topic(
|
||||
|
|
@ -66,6 +79,9 @@ class LiteratureExplorerAppServer:
|
|||
min_relevance=float(params.get("min_relevance", 0.2)),
|
||||
seed_keys=_string_list(params.get("seed_keys")),
|
||||
preview_only=bool(params.get("preview_only", False)),
|
||||
max_rounds=int(params.get("max_rounds", 1)),
|
||||
recent_years=_optional_int(params.get("recent_years")),
|
||||
target_recent_entries=_optional_int(params.get("target_recent_entries")),
|
||||
)
|
||||
if method == "extract_text":
|
||||
return self.api.extract_text(
|
||||
|
|
@ -106,6 +122,9 @@ def create_request_handler(server: LiteratureExplorerAppServer):
|
|||
if self.path != "/call":
|
||||
self._write_json({"error": "not_found"}, status=HTTPStatus.NOT_FOUND)
|
||||
return
|
||||
if not _request_is_authorized(self.headers, server.api_token):
|
||||
self._write_unauthorized()
|
||||
return
|
||||
try:
|
||||
body = self.rfile.read(int(self.headers.get("Content-Length", "0") or "0"))
|
||||
payload = json.loads(body.decode("utf-8") or "{}")
|
||||
|
|
@ -125,6 +144,9 @@ def create_request_handler(server: LiteratureExplorerAppServer):
|
|||
self._write_json({"ok": True})
|
||||
return
|
||||
if self.path == "/capabilities":
|
||||
if not _request_is_authorized(self.headers, server.api_token):
|
||||
self._write_unauthorized()
|
||||
return
|
||||
self._write_json({"ok": True, "result": server.dispatch("capabilities", {})})
|
||||
return
|
||||
self._write_json({"error": "not_found"}, status=HTTPStatus.NOT_FOUND)
|
||||
|
|
@ -143,9 +165,19 @@ def create_request_handler(server: LiteratureExplorerAppServer):
|
|||
|
||||
def _write_cors_headers(self) -> None:
|
||||
self.send_header("Access-Control-Allow-Origin", "*")
|
||||
self.send_header("Access-Control-Allow-Headers", "Content-Type")
|
||||
self.send_header("Access-Control-Allow-Headers", "Authorization, Content-Type, X-API-Token")
|
||||
self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||||
|
||||
def _write_unauthorized(self) -> None:
|
||||
body = json.dumps({"ok": False, "error": "unauthorized"}, indent=2).encode("utf-8")
|
||||
self.send_response(HTTPStatus.UNAUTHORIZED)
|
||||
self._write_cors_headers()
|
||||
self.send_header("WWW-Authenticate", 'Bearer realm="citegeist"')
|
||||
self.send_header("Content-Type", "application/json; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
return Handler
|
||||
|
||||
|
||||
|
|
@ -154,12 +186,14 @@ def main(argv: list[str] | None = None) -> int:
|
|||
parser.add_argument("--db", default="library.sqlite3", help="SQLite database path")
|
||||
parser.add_argument("--host", default="127.0.0.1", help="Bind host")
|
||||
parser.add_argument("--port", type=int, default=8765, help="Bind port")
|
||||
parser.add_argument("--api-token", default=None, help="Optional bearer token required for API access")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
store = BibliographyStore(Path(args.db))
|
||||
api = LiteratureExplorerApi(store)
|
||||
server = LiteratureExplorerAppServer(api)
|
||||
httpd = ThreadingHTTPServer((args.host, args.port), create_request_handler(server))
|
||||
api_token = args.api_token or os.environ.get("CITEGEIST_API_TOKEN")
|
||||
server = LiteratureExplorerAppServer(api, api_token=api_token)
|
||||
httpd = HTTPServer((args.host, args.port), create_request_handler(server))
|
||||
try:
|
||||
print(f"CiteGeist explorer server listening on http://{args.host}:{args.port}")
|
||||
httpd.serve_forever()
|
||||
|
|
@ -182,6 +216,12 @@ def _optional_int(value: object) -> int | None:
|
|||
return int(value)
|
||||
|
||||
|
||||
def _optional_float(value: object) -> float | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _string_list(value: object) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
|
|
@ -190,5 +230,28 @@ def _string_list(value: object) -> list[str]:
|
|||
return [str(value)] if str(value) else []
|
||||
|
||||
|
||||
def _request_is_authorized(headers: Any, api_token: str | None) -> bool:
|
||||
if not api_token:
|
||||
return True
|
||||
bearer_value = _extract_bearer_token(headers)
|
||||
if bearer_value == api_token:
|
||||
return True
|
||||
header_token = headers.get("X-API-Token", "").strip() if headers else ""
|
||||
return header_token == api_token
|
||||
|
||||
|
||||
def _extract_bearer_token(headers: Any) -> str | None:
|
||||
if not headers:
|
||||
return None
|
||||
authorization = headers.get("Authorization", "")
|
||||
if not authorization:
|
||||
return None
|
||||
scheme, _, value = authorization.partition(" ")
|
||||
if scheme.lower() != "bearer":
|
||||
return None
|
||||
value = value.strip()
|
||||
return value or None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
|
|||
|
|
@ -56,6 +56,12 @@ class BatchBootstrapRunner:
|
|||
topic_slug = job.get("topic_slug")
|
||||
topic_name = job.get("topic_name")
|
||||
topic_phrase = job.get("topic_phrase")
|
||||
expansion_mode = str(job.get("expansion_mode", "legacy"))
|
||||
expansion_rounds = int(job.get("expansion_rounds", 1))
|
||||
recent_years = job.get("recent_years")
|
||||
target_recent_entries = job.get("target_recent_entries")
|
||||
max_expanded_entries = job.get("max_expanded_entries")
|
||||
max_expand_seconds = job.get("max_expand_seconds")
|
||||
|
||||
seed_bibtex = None
|
||||
if seed_bib:
|
||||
|
|
@ -73,6 +79,12 @@ class BatchBootstrapRunner:
|
|||
topic_slug=str(topic_slug) if topic_slug else None,
|
||||
topic_name=str(topic_name) if topic_name else None,
|
||||
topic_phrase=str(topic_phrase) if topic_phrase else None,
|
||||
expansion_mode=expansion_mode,
|
||||
expansion_rounds=expansion_rounds,
|
||||
recent_years=int(recent_years) if recent_years is not None else None,
|
||||
target_recent_entries=int(target_recent_entries) if target_recent_entries is not None else None,
|
||||
max_expanded_entries=int(max_expanded_entries) if max_expanded_entries is not None else None,
|
||||
max_expand_seconds=float(max_expand_seconds) if max_expand_seconds is not None else None,
|
||||
)
|
||||
results.append(BatchJobResult(name, len(job_results), job_results))
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -1,10 +1,19 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .bibtex import BibEntry, parse_bibtex
|
||||
from .expand import CrossrefExpander, OpenAlexExpander
|
||||
from .expand import (
|
||||
CrossrefExpander,
|
||||
OpenAlexExpander,
|
||||
_entry_is_recent,
|
||||
_expand_relation_types,
|
||||
_meets_topic_assignment_threshold as _expand_meets_topic_assignment_threshold,
|
||||
_topic_relevance_score as _expand_topic_relevance_score,
|
||||
)
|
||||
from .resolve import MetadataResolver
|
||||
from .storage import BibliographyStore
|
||||
|
||||
|
|
@ -31,6 +40,7 @@ class Bootstrapper:
|
|||
self.resolver = resolver or MetadataResolver()
|
||||
self.crossref_expander = crossref_expander or CrossrefExpander(self.resolver)
|
||||
self.openalex_expander = openalex_expander or OpenAlexExpander(self.resolver)
|
||||
self.last_run_meta: dict[str, object] = {}
|
||||
|
||||
def bootstrap(
|
||||
self,
|
||||
|
|
@ -45,7 +55,25 @@ class Bootstrapper:
|
|||
topic_slug: str | None = None,
|
||||
topic_name: str | None = None,
|
||||
topic_phrase: str | None = None,
|
||||
expansion_mode: str = "legacy",
|
||||
expansion_rounds: int = 1,
|
||||
recent_years: int | None = None,
|
||||
target_recent_entries: int | None = None,
|
||||
max_expanded_entries: int | None = None,
|
||||
max_expand_seconds: float | None = None,
|
||||
) -> list[BootstrapResult]:
|
||||
self.last_run_meta = {
|
||||
"stop_reason": "completed",
|
||||
"expansion_mode": expansion_mode,
|
||||
"preview_only": preview_only,
|
||||
"recent_years": recent_years,
|
||||
"target_recent_entries": target_recent_entries,
|
||||
"max_expanded_entries": max_expanded_entries,
|
||||
"max_expand_seconds": max_expand_seconds,
|
||||
"recent_hits": 0,
|
||||
"recent_topic_hits": 0,
|
||||
"expanded_discoveries": 0,
|
||||
}
|
||||
results: list[BootstrapResult] = []
|
||||
seed_keys: list[str] = []
|
||||
effective_topic_slug = topic_slug or (_slugify(topic) if topic else None)
|
||||
|
|
@ -140,15 +168,200 @@ class Bootstrapper:
|
|||
|
||||
if expand and not preview_only:
|
||||
expanded_keys = list(dict.fromkeys(seed_keys))
|
||||
for citation_key in expanded_keys:
|
||||
for item in self.crossref_expander.expand_entry_references(store, citation_key):
|
||||
results.append(BootstrapResult(item.discovered_citation_key, "crossref_expand", item.created_entry))
|
||||
for item in self.openalex_expander.expand_entry(store, citation_key, relation_type="cites", limit=topic_limit):
|
||||
results.append(BootstrapResult(item.discovered_citation_key, "openalex_expand", item.created_entry))
|
||||
expanded_discoveries: set[str] = set()
|
||||
deadline = time.monotonic() + max_expand_seconds if max_expand_seconds is not None else None
|
||||
if expansion_mode == "legacy":
|
||||
random.shuffle(expanded_keys)
|
||||
for citation_key in expanded_keys:
|
||||
if _deadline_reached(deadline):
|
||||
store.connection.commit()
|
||||
return results
|
||||
for item in self.crossref_expander.expand_entry_references(store, citation_key):
|
||||
results.append(BootstrapResult(item.discovered_citation_key, "crossref_expand", item.created_entry))
|
||||
expanded_discoveries.add(item.discovered_citation_key)
|
||||
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expanded_entries",
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
for item in self.openalex_expander.expand_entry(
|
||||
store,
|
||||
citation_key,
|
||||
relation_type="cites",
|
||||
limit=topic_limit,
|
||||
):
|
||||
results.append(BootstrapResult(item.discovered_citation_key, "openalex_expand", item.created_entry))
|
||||
expanded_discoveries.add(item.discovered_citation_key)
|
||||
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expanded_entries",
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
else:
|
||||
results.extend(
|
||||
self._bootstrap_openalex_expansion(
|
||||
store,
|
||||
expanded_keys,
|
||||
relation_type=expansion_mode,
|
||||
limit=topic_limit,
|
||||
max_rounds=expansion_rounds,
|
||||
topic_slug=effective_topic_slug,
|
||||
topic_name=effective_topic_name,
|
||||
topic_phrase=topic_phrase or topic,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
max_expanded_entries=max_expanded_entries,
|
||||
deadline=deadline,
|
||||
)
|
||||
)
|
||||
|
||||
self.last_run_meta.setdefault("stop_reason", "completed")
|
||||
store.connection.commit()
|
||||
return results
|
||||
|
||||
def _bootstrap_openalex_expansion(
|
||||
self,
|
||||
store: BibliographyStore,
|
||||
seed_keys: list[str],
|
||||
relation_type: str,
|
||||
limit: int,
|
||||
max_rounds: int,
|
||||
topic_slug: str | None,
|
||||
topic_name: str | None,
|
||||
topic_phrase: str | None,
|
||||
recent_years: int | None,
|
||||
target_recent_entries: int | None,
|
||||
max_expanded_entries: int | None,
|
||||
deadline: float | None,
|
||||
) -> list[BootstrapResult]:
|
||||
results: list[BootstrapResult] = []
|
||||
frontier = list(dict.fromkeys(seed_keys))
|
||||
seen_seeds: set[str] = set()
|
||||
recent_hits: set[str] = set()
|
||||
recent_topic_hits: set[str] = set()
|
||||
expanded_discoveries: set[str] = set()
|
||||
|
||||
for _round in range(max(1, max_rounds)):
|
||||
if not frontier:
|
||||
break
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
next_frontier: list[str] = []
|
||||
for citation_key in frontier:
|
||||
if citation_key in seen_seeds:
|
||||
continue
|
||||
seen_seeds.add(citation_key)
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
for relation_name in _expand_relation_types(relation_type):
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
for item in self.openalex_expander.expand_entry(
|
||||
store,
|
||||
citation_key,
|
||||
relation_type=relation_name,
|
||||
limit=limit,
|
||||
):
|
||||
discovered_key = item.discovered_citation_key
|
||||
entry = store.get_entry(discovered_key)
|
||||
if _entry_is_recent(entry, recent_years):
|
||||
recent_hits.add(discovered_key)
|
||||
if topic_slug and topic_name and topic_phrase and entry is not None:
|
||||
score = _expand_topic_relevance_score(topic_phrase, entry)
|
||||
if _expand_meets_topic_assignment_threshold(
|
||||
topic_phrase,
|
||||
entry,
|
||||
min_relevance=0.2,
|
||||
relevance_score=score,
|
||||
):
|
||||
store.add_entry_topic(
|
||||
discovered_key,
|
||||
topic_slug=topic_slug,
|
||||
topic_name=topic_name,
|
||||
source_type="bootstrap_expand",
|
||||
source_label=f"openalex:{relation_name}:{citation_key}",
|
||||
confidence=score,
|
||||
expansion_phrase=topic_phrase,
|
||||
)
|
||||
if _entry_is_recent(entry, recent_years) and score >= 0.5:
|
||||
recent_topic_hits.add(discovered_key)
|
||||
results.append(BootstrapResult(discovered_key, f"openalex_expand:{relation_name}", item.created_entry))
|
||||
expanded_discoveries.add(discovered_key)
|
||||
if discovered_key not in seen_seeds:
|
||||
next_frontier.append(discovered_key)
|
||||
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expanded_entries",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "target_recent_entries",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
if _deadline_reached(deadline):
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "max_expand_seconds",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
frontier = list(dict.fromkeys(next_frontier))
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "frontier_exhausted",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
"expanded_discoveries": len(expanded_discoveries),
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def _deadline_reached(deadline: float | None) -> bool:
|
||||
return deadline is not None and time.monotonic() >= deadline
|
||||
|
||||
def _topic_candidates(self, topic: str, seed_keys: list[str], limit: int) -> list[tuple[BibEntry, float]]:
|
||||
scored: dict[str, tuple[BibEntry, float]] = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from .batch import BatchBootstrapRunner, load_batch_jobs
|
|||
from .bibtex import parse_bibtex, render_bibtex
|
||||
from .bootstrap import Bootstrapper
|
||||
from .examples.talkorigins import TalkOriginsScraper
|
||||
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander
|
||||
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
|
||||
from .extract import (
|
||||
available_extraction_backends,
|
||||
check_extraction_comparison_summary,
|
||||
|
|
@ -202,7 +202,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
)
|
||||
expand_parser.add_argument(
|
||||
"--relation",
|
||||
choices=["cites", "cited_by"],
|
||||
choices=["cites", "cited_by", "both"],
|
||||
default="cites",
|
||||
help="Graph direction to expand for sources that support it",
|
||||
)
|
||||
|
|
@ -225,12 +225,23 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
)
|
||||
expand_topic_parser.add_argument(
|
||||
"--relation",
|
||||
choices=["cites", "cited_by"],
|
||||
choices=["cites", "cited_by", "both"],
|
||||
default="cites",
|
||||
help="Graph direction to expand for sources that support it",
|
||||
)
|
||||
expand_topic_parser.add_argument("--seed-limit", type=int, default=25, help="Maximum topic seed entries to expand from")
|
||||
expand_topic_parser.add_argument("--per-seed-limit", type=int, default=25, help="Maximum discovered works to fetch per seed")
|
||||
expand_topic_parser.add_argument("--rounds", type=int, default=1, help="Maximum recursive expansion rounds")
|
||||
expand_topic_parser.add_argument(
|
||||
"--recent-years",
|
||||
type=int,
|
||||
help="Treat discoveries within this many years of the current year as recent for termination heuristics",
|
||||
)
|
||||
expand_topic_parser.add_argument(
|
||||
"--target-recent-entries",
|
||||
type=int,
|
||||
help="Stop recursive topic expansion once this many recent discoveries have been found",
|
||||
)
|
||||
expand_topic_parser.add_argument(
|
||||
"--seed-key",
|
||||
action="append",
|
||||
|
|
@ -300,6 +311,38 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
action="store_true",
|
||||
help="Do not run immediate graph expansion after seeding",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--expansion-mode",
|
||||
choices=["legacy", "cites", "cited_by", "both"],
|
||||
default="legacy",
|
||||
help="Expansion policy after bootstrap seeding; legacy keeps Crossref refs plus OpenAlex cites",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--expansion-rounds",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Maximum recursive OpenAlex expansion rounds for non-legacy expansion modes",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--recent-years",
|
||||
type=int,
|
||||
help="Treat discoveries within this many years of the current year as recent for termination heuristics",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--target-recent-entries",
|
||||
type=int,
|
||||
help="Stop non-legacy expansion once this many recent discoveries have been found",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--max-expanded-entries",
|
||||
type=int,
|
||||
help="Hard cap on unique discovered entries added during one bootstrap job",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--max-expand-seconds",
|
||||
type=float,
|
||||
help="Wall-clock cap for one bootstrap job's expansion phase",
|
||||
)
|
||||
bootstrap_parser.add_argument(
|
||||
"--preview",
|
||||
action="store_true",
|
||||
|
|
@ -364,6 +407,38 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
type=int,
|
||||
help="Default bootstrap topic commit limit to include in generated jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--expansion-mode",
|
||||
choices=["legacy", "cites", "cited_by", "both"],
|
||||
default="legacy",
|
||||
help="Expansion policy to write into generated bootstrap jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--expansion-rounds",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Maximum recursive OpenAlex expansion rounds to write into generated jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--recent-years",
|
||||
type=int,
|
||||
help="Optional recent-discovery window to write into generated jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--target-recent-entries",
|
||||
type=int,
|
||||
help="Optional recent-discovery target to write into generated jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--max-expanded-entries",
|
||||
type=int,
|
||||
help="Optional hard cap on unique discovered entries per generated bootstrap job",
|
||||
)
|
||||
talkorigins_parser.add_argument(
|
||||
"--max-expand-seconds",
|
||||
type=float,
|
||||
help="Optional wall-clock cap to write into generated bootstrap jobs",
|
||||
)
|
||||
talkorigins_parser.add_argument("--status", default="draft", help="Review status for generated seed jobs")
|
||||
|
||||
validate_talkorigins_parser = subparsers.add_parser(
|
||||
|
|
@ -637,6 +712,11 @@ def main(argv: list[str] | None = None) -> int:
|
|||
args.min_relevance,
|
||||
args.seed_keys,
|
||||
args.preview,
|
||||
args.rounds,
|
||||
args.recent_years,
|
||||
args.target_recent_entries,
|
||||
args.max_expanded_entries,
|
||||
args.max_expand_seconds,
|
||||
)
|
||||
if args.command == "set-topic-phrase":
|
||||
return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
|
||||
|
|
@ -666,6 +746,10 @@ def main(argv: list[str] | None = None) -> int:
|
|||
args.topic_slug,
|
||||
args.topic_name,
|
||||
args.store_topic_phrase,
|
||||
args.expansion_mode,
|
||||
args.expansion_rounds,
|
||||
args.recent_years,
|
||||
args.target_recent_entries,
|
||||
)
|
||||
if args.command == "bootstrap-batch":
|
||||
return _run_bootstrap_batch(store, Path(args.input))
|
||||
|
|
@ -682,6 +766,12 @@ def main(argv: list[str] | None = None) -> int:
|
|||
not args.no_resume,
|
||||
args.topic_limit,
|
||||
args.topic_commit_limit,
|
||||
args.expansion_mode,
|
||||
args.expansion_rounds,
|
||||
args.recent_years,
|
||||
args.target_recent_entries,
|
||||
args.max_expanded_entries,
|
||||
args.max_expand_seconds,
|
||||
args.status,
|
||||
)
|
||||
if args.command in {"example-talkorigins-validate", "validate-talkorigins"}:
|
||||
|
|
@ -1387,7 +1477,11 @@ def _run_expand(
|
|||
expand_fn = lambda key: expander.expand_entry_references(store, key)
|
||||
elif source == "openalex":
|
||||
expander = OpenAlexExpander()
|
||||
expand_fn = lambda key: expander.expand_entry(store, key, relation_type=relation, limit=limit)
|
||||
expand_fn = lambda key: [
|
||||
item
|
||||
for relation_name in _expand_relation_types(relation)
|
||||
for item in expander.expand_entry(store, key, relation_type=relation_name, limit=limit)
|
||||
]
|
||||
else:
|
||||
print(f"Unsupported expansion source: {source}", file=sys.stderr)
|
||||
return 1
|
||||
|
|
@ -1412,6 +1506,9 @@ def _run_expand_topic(
|
|||
min_relevance: float,
|
||||
seed_keys: list[str] | None,
|
||||
preview: bool,
|
||||
rounds: int,
|
||||
recent_years: int | None,
|
||||
target_recent_entries: int | None,
|
||||
) -> int:
|
||||
expander = TopicExpander()
|
||||
_print_phase(f"Loading topic expansion for {topic_slug}")
|
||||
|
|
@ -1430,6 +1527,9 @@ def _run_expand_topic(
|
|||
min_relevance=min_relevance,
|
||||
seed_keys=seed_keys,
|
||||
preview_only=preview,
|
||||
max_rounds=rounds,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
)
|
||||
print(json.dumps([asdict(result) for result in results], indent=2))
|
||||
return 0
|
||||
|
|
@ -1513,6 +1613,12 @@ def _run_bootstrap(
|
|||
topic_slug: str | None,
|
||||
topic_name: str | None,
|
||||
stored_topic_phrase: str | None,
|
||||
expansion_mode: str,
|
||||
expansion_rounds: int,
|
||||
recent_years: int | None,
|
||||
target_recent_entries: int | None,
|
||||
max_expanded_entries: int | None,
|
||||
max_expand_seconds: float | None,
|
||||
) -> int:
|
||||
if not seed_bib and not topic:
|
||||
print("bootstrap requires --seed-bib, --topic, or both", file=sys.stderr)
|
||||
|
|
@ -1533,6 +1639,12 @@ def _run_bootstrap(
|
|||
topic_slug=topic_slug,
|
||||
topic_name=topic_name,
|
||||
topic_phrase=stored_topic_phrase,
|
||||
expansion_mode=expansion_mode,
|
||||
expansion_rounds=expansion_rounds,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
max_expanded_entries=max_expanded_entries,
|
||||
max_expand_seconds=max_expand_seconds,
|
||||
)
|
||||
print(json.dumps([asdict(result) for result in results], indent=2))
|
||||
return 0
|
||||
|
|
@ -1570,6 +1682,12 @@ def _run_scrape_talkorigins(
|
|||
resume: bool,
|
||||
topic_limit: int,
|
||||
topic_commit_limit: int | None,
|
||||
expansion_mode: str,
|
||||
expansion_rounds: int,
|
||||
recent_years: int | None,
|
||||
target_recent_entries: int | None,
|
||||
max_expanded_entries: int | None,
|
||||
max_expand_seconds: float | None,
|
||||
review_status: str,
|
||||
) -> int:
|
||||
scraper = TalkOriginsScraper()
|
||||
|
|
@ -1586,6 +1704,12 @@ def _run_scrape_talkorigins(
|
|||
resume=resume,
|
||||
topic_limit=topic_limit,
|
||||
topic_commit_limit=topic_commit_limit,
|
||||
expansion_mode=expansion_mode,
|
||||
expansion_rounds=expansion_rounds,
|
||||
recent_years=recent_years,
|
||||
target_recent_entries=target_recent_entries,
|
||||
max_expanded_entries=max_expanded_entries,
|
||||
max_expand_seconds=max_expand_seconds,
|
||||
)
|
||||
print(json.dumps(asdict(export), indent=2))
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
import html
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
|
@ -180,6 +181,7 @@ class OpenAlexExpander:
|
|||
|
||||
if relation_type == "cites":
|
||||
source_key = citation_key
|
||||
target_key = target_key
|
||||
else:
|
||||
source_key = target_key
|
||||
target_key = citation_key
|
||||
|
|
@ -194,8 +196,8 @@ class OpenAlexExpander:
|
|||
)
|
||||
results.append(
|
||||
ExpansionResult(
|
||||
source_citation_key=source_key,
|
||||
discovered_citation_key=target_key,
|
||||
source_citation_key=citation_key,
|
||||
discovered_citation_key=existing_key or discovered.citation_key,
|
||||
created_entry=created,
|
||||
relation_type=relation_type,
|
||||
source_label=f"openalex:{relation_type}:{openalex_id}",
|
||||
|
|
@ -225,6 +227,7 @@ class TopicExpander:
|
|||
) -> None:
|
||||
self.crossref_expander = crossref_expander or CrossrefExpander()
|
||||
self.openalex_expander = openalex_expander or OpenAlexExpander()
|
||||
self.last_run_meta: dict[str, object] = {}
|
||||
|
||||
def expand_topic(
|
||||
self,
|
||||
|
|
@ -238,7 +241,21 @@ class TopicExpander:
|
|||
min_relevance: float = 0.2,
|
||||
seed_keys: list[str] | None = None,
|
||||
preview_only: bool = False,
|
||||
max_rounds: int = 1,
|
||||
recent_years: int | None = None,
|
||||
target_recent_entries: int | None = None,
|
||||
) -> list[TopicExpansionResult]:
|
||||
self.last_run_meta = {
|
||||
"stop_reason": "completed",
|
||||
"preview_only": preview_only,
|
||||
"relation_type": relation_type,
|
||||
"source": source,
|
||||
"max_rounds": max_rounds,
|
||||
"recent_years": recent_years,
|
||||
"target_recent_entries": target_recent_entries,
|
||||
"recent_hits": 0,
|
||||
"recent_topic_hits": 0,
|
||||
}
|
||||
topic = store.get_topic(topic_slug)
|
||||
if topic is None:
|
||||
return []
|
||||
|
|
@ -249,59 +266,89 @@ class TopicExpander:
|
|||
allowed = set(seed_keys)
|
||||
seeds = [seed for seed in seeds if str(seed["citation_key"]) in allowed]
|
||||
results: list[TopicExpansionResult] = []
|
||||
frontier = [str(seed["citation_key"]) for seed in seeds]
|
||||
seen_seed_keys: set[str] = set()
|
||||
recent_hits: set[str] = set()
|
||||
recent_topic_hits: set[str] = set()
|
||||
|
||||
for seed in seeds:
|
||||
seed_key = str(seed["citation_key"])
|
||||
if preview_only:
|
||||
discovered_rows = self._preview_discoveries(
|
||||
store,
|
||||
seed_key,
|
||||
source=source,
|
||||
relation_type=relation_type,
|
||||
limit=per_seed_limit,
|
||||
)
|
||||
else:
|
||||
discovered_rows = self._materialized_discoveries(
|
||||
store,
|
||||
seed_key,
|
||||
source=source,
|
||||
relation_type=relation_type,
|
||||
limit=per_seed_limit,
|
||||
)
|
||||
|
||||
for row, target_entry in discovered_rows:
|
||||
score = _topic_relevance_score(phrase, target_entry)
|
||||
meets_threshold = _meets_topic_assignment_threshold(
|
||||
phrase,
|
||||
target_entry,
|
||||
min_relevance=min_relevance,
|
||||
relevance_score=score,
|
||||
)
|
||||
assigned = False
|
||||
if not preview_only and meets_threshold and target_entry is not None:
|
||||
assigned = store.add_entry_topic(
|
||||
row.discovered_citation_key,
|
||||
topic_slug=topic_slug,
|
||||
topic_name=str(topic.get("name") or topic_slug),
|
||||
source_type="topic_expand",
|
||||
source_url=str(topic.get("source_url") or ""),
|
||||
source_label=f"{source}:{relation_type}:{seed_key}",
|
||||
confidence=score,
|
||||
for _round in range(max(1, max_rounds)):
|
||||
if not frontier:
|
||||
break
|
||||
next_frontier: list[str] = []
|
||||
for seed_key in frontier:
|
||||
if seed_key in seen_seed_keys:
|
||||
continue
|
||||
seen_seed_keys.add(seed_key)
|
||||
if preview_only:
|
||||
discovered_rows = self._preview_discoveries(
|
||||
store,
|
||||
seed_key,
|
||||
source=source,
|
||||
relation_type=relation_type,
|
||||
limit=per_seed_limit,
|
||||
)
|
||||
results.append(
|
||||
TopicExpansionResult(
|
||||
topic_slug=topic_slug,
|
||||
source_citation_key=row.source_citation_key,
|
||||
discovered_citation_key=row.discovered_citation_key,
|
||||
discovered_title=str(target_entry.get("title") or ""),
|
||||
created_entry=row.created_entry,
|
||||
relation_type=row.relation_type,
|
||||
source_label=row.source_label,
|
||||
else:
|
||||
discovered_rows = self._materialized_discoveries(
|
||||
store,
|
||||
seed_key,
|
||||
source=source,
|
||||
relation_type=relation_type,
|
||||
limit=per_seed_limit,
|
||||
)
|
||||
|
||||
for row, target_entry in discovered_rows:
|
||||
score = _topic_relevance_score(phrase, target_entry)
|
||||
meets_threshold = _meets_topic_assignment_threshold(
|
||||
phrase,
|
||||
target_entry,
|
||||
min_relevance=min_relevance,
|
||||
relevance_score=score,
|
||||
meets_relevance_threshold=meets_threshold,
|
||||
assigned_to_topic=assigned,
|
||||
)
|
||||
)
|
||||
assigned = False
|
||||
if not preview_only and meets_threshold and target_entry is not None:
|
||||
assigned = store.add_entry_topic(
|
||||
row.discovered_citation_key,
|
||||
topic_slug=topic_slug,
|
||||
topic_name=str(topic.get("name") or topic_slug),
|
||||
source_type="topic_expand",
|
||||
source_url=str(topic.get("source_url") or ""),
|
||||
source_label=f"{source}:{row.relation_type}:{seed_key}",
|
||||
confidence=score,
|
||||
)
|
||||
if assigned and _entry_is_recent(target_entry, recent_years) and score >= 0.5:
|
||||
recent_topic_hits.add(row.discovered_citation_key)
|
||||
if _entry_is_recent(target_entry, recent_years):
|
||||
recent_hits.add(row.discovered_citation_key)
|
||||
if row.discovered_citation_key not in seen_seed_keys:
|
||||
next_frontier.append(row.discovered_citation_key)
|
||||
results.append(
|
||||
TopicExpansionResult(
|
||||
topic_slug=topic_slug,
|
||||
source_citation_key=row.source_citation_key,
|
||||
discovered_citation_key=row.discovered_citation_key,
|
||||
discovered_title=str(target_entry.get("title") or ""),
|
||||
created_entry=row.created_entry,
|
||||
relation_type=row.relation_type,
|
||||
source_label=row.source_label,
|
||||
relevance_score=score,
|
||||
meets_relevance_threshold=meets_threshold,
|
||||
assigned_to_topic=assigned,
|
||||
)
|
||||
)
|
||||
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "target_recent_entries",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
frontier = list(dict.fromkeys(next_frontier))
|
||||
self.last_run_meta.update({
|
||||
"stop_reason": "frontier_exhausted",
|
||||
"recent_hits": len(recent_hits),
|
||||
"recent_topic_hits": len(recent_topic_hits),
|
||||
})
|
||||
store.connection.commit()
|
||||
return results
|
||||
|
||||
|
|
@ -316,12 +363,16 @@ class TopicExpander:
|
|||
if source == "crossref":
|
||||
expansion_rows = self.crossref_expander.expand_entry_references(store, citation_key)
|
||||
else:
|
||||
expansion_rows = self.openalex_expander.expand_entry(
|
||||
store,
|
||||
citation_key,
|
||||
relation_type=relation_type,
|
||||
limit=limit,
|
||||
)
|
||||
expansion_rows: list[ExpansionResult] = []
|
||||
for relation_name in _expand_relation_types(relation_type):
|
||||
expansion_rows.extend(
|
||||
self.openalex_expander.expand_entry(
|
||||
store,
|
||||
citation_key,
|
||||
relation_type=relation_name,
|
||||
limit=limit,
|
||||
)
|
||||
)
|
||||
return [(row, store.get_entry(row.discovered_citation_key)) for row in expansion_rows]
|
||||
|
||||
def _preview_discoveries(
|
||||
|
|
@ -334,7 +385,10 @@ class TopicExpander:
|
|||
) -> list[tuple[ExpansionResult, dict[str, object]]]:
|
||||
if source == "crossref":
|
||||
return self._preview_crossref_discoveries(store, citation_key, limit)
|
||||
return self._preview_openalex_discoveries(store, citation_key, relation_type, limit)
|
||||
rows: list[tuple[ExpansionResult, dict[str, object]]] = []
|
||||
for relation_name in _expand_relation_types(relation_type):
|
||||
rows.extend(self._preview_openalex_discoveries(store, citation_key, relation_name, limit))
|
||||
return rows
|
||||
|
||||
def _preview_crossref_discoveries(
|
||||
self,
|
||||
|
|
@ -399,11 +453,10 @@ class TopicExpander:
|
|||
if existing_key is None and _skip_openalex_review_like_duplicate(store, discovered):
|
||||
continue
|
||||
target_key = existing_key or discovered.citation_key
|
||||
source_key = citation_key if relation_type == "cites" else target_key
|
||||
rows.append(
|
||||
(
|
||||
ExpansionResult(
|
||||
source_citation_key=source_key,
|
||||
source_citation_key=citation_key,
|
||||
discovered_citation_key=target_key,
|
||||
created_entry=existing_key is None and store.get_entry(discovered.citation_key) is None,
|
||||
relation_type=relation_type,
|
||||
|
|
@ -441,6 +494,21 @@ def _crossref_reference_to_entry(reference: dict, source_citation_key: str, ordi
|
|||
return BibEntry(entry_type=entry_type, citation_key=citation_key, fields=fields)
|
||||
|
||||
|
||||
def _expand_relation_types(relation_type: str) -> list[str]:
|
||||
if relation_type == "both":
|
||||
return ["cites", "cited_by"]
|
||||
return [relation_type]
|
||||
|
||||
|
||||
def _entry_is_recent(entry: dict[str, object] | None, recent_years: int | None) -> bool:
|
||||
if entry is None or recent_years is None or recent_years < 0:
|
||||
return False
|
||||
year_value = str(entry.get("year") or "").strip()
|
||||
if not year_value.isdigit():
|
||||
return False
|
||||
return int(year_value) >= date.today().year - recent_years
|
||||
|
||||
|
||||
def _crossref_reference_title(reference: dict, ordinal: int) -> str:
|
||||
raw_title = (
|
||||
reference.get("article-title")
|
||||
|
|
|
|||
|
|
@ -1049,6 +1049,13 @@ class BibliographyStore:
|
|||
return render_bibtex([entry])
|
||||
|
||||
def export_bibtex(self, citation_keys: list[str] | None = None, include_stubs: bool | None = None) -> str:
|
||||
return self.export_bibtex_report(citation_keys, include_stubs=include_stubs)["bibtex"]
|
||||
|
||||
def export_bibtex_report(
|
||||
self,
|
||||
citation_keys: list[str] | None = None,
|
||||
include_stubs: bool | None = None,
|
||||
) -> dict[str, object]:
|
||||
explicit_keys = citation_keys is not None
|
||||
if include_stubs is None:
|
||||
include_stubs = explicit_keys
|
||||
|
|
@ -1058,7 +1065,6 @@ class BibliographyStore:
|
|||
).fetchall()
|
||||
citation_keys = [str(row["citation_key"]) for row in rows]
|
||||
|
||||
chunks: list[str] = []
|
||||
entries: list[BibEntry] = []
|
||||
for citation_key in citation_keys:
|
||||
entry = self._load_bib_entry(citation_key)
|
||||
|
|
@ -1066,9 +1072,27 @@ class BibliographyStore:
|
|||
if not include_stubs and self._is_export_stub(entry):
|
||||
continue
|
||||
entries.append(entry)
|
||||
if not entries:
|
||||
return ""
|
||||
return render_bibtex(entries)
|
||||
chunks: list[str] = []
|
||||
skipped: list[dict[str, str]] = []
|
||||
for entry in entries:
|
||||
try:
|
||||
rendered = render_bibtex([entry]).strip()
|
||||
except Exception as exc:
|
||||
skipped.append(
|
||||
{
|
||||
"citation_key": entry.citation_key,
|
||||
"error": str(exc),
|
||||
}
|
||||
)
|
||||
continue
|
||||
if rendered:
|
||||
chunks.append(rendered)
|
||||
return {
|
||||
"bibtex": "\n\n".join(chunks).strip(),
|
||||
"requested_count": len(entries),
|
||||
"exported_count": len(chunks),
|
||||
"skipped": skipped,
|
||||
}
|
||||
|
||||
def _detect_fts5(self) -> bool:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -189,6 +189,12 @@ class TalkOriginsScraper:
|
|||
expand: bool = False,
|
||||
topic_limit: int = 5,
|
||||
topic_commit_limit: int | None = None,
|
||||
expansion_mode: str = "legacy",
|
||||
expansion_rounds: int = 1,
|
||||
recent_years: int | None = None,
|
||||
target_recent_entries: int | None = None,
|
||||
max_expanded_entries: int | None = None,
|
||||
max_expand_seconds: float | None = None,
|
||||
resume: bool = True,
|
||||
) -> TalkOriginsBatchExport:
|
||||
output_root = Path(output_dir)
|
||||
|
|
@ -286,6 +292,12 @@ class TalkOriginsScraper:
|
|||
"status": review_status,
|
||||
"topic_limit": topic_limit,
|
||||
"topic_commit_limit": topic_commit_limit,
|
||||
"expansion_mode": expansion_mode,
|
||||
"expansion_rounds": expansion_rounds,
|
||||
"recent_years": recent_years,
|
||||
"target_recent_entries": target_recent_entries,
|
||||
"max_expanded_entries": max_expanded_entries,
|
||||
"max_expand_seconds": max_expand_seconds,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,11 @@ from citegeist.expand import ExpansionResult
|
|||
|
||||
|
||||
class FakeBootstrapper:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def bootstrap(self, store, **kwargs):
|
||||
self.calls.append(dict(kwargs))
|
||||
if not kwargs.get("preview_only"):
|
||||
store.ensure_topic("graph-topic", "Graph Topic", source_type="bootstrap", expansion_phrase="graph topic")
|
||||
store.upsert_entry(
|
||||
|
|
@ -40,7 +44,11 @@ class FakeBootstrapper:
|
|||
|
||||
|
||||
class FakeTopicExpander:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def expand_topic(self, store, topic_slug, **kwargs):
|
||||
self.calls.append({"topic_slug": topic_slug, **kwargs})
|
||||
preview_only = kwargs.get("preview_only", False)
|
||||
if not preview_only:
|
||||
store.upsert_entry(
|
||||
|
|
@ -102,18 +110,99 @@ def test_literature_explorer_api_search_and_show_entry():
|
|||
def test_literature_explorer_api_bootstrap_returns_topic_payload():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
api = LiteratureExplorerApi(store, bootstrapper=FakeBootstrapper())
|
||||
bootstrapper = FakeBootstrapper()
|
||||
api = LiteratureExplorerApi(store, bootstrapper=bootstrapper)
|
||||
payload = api.bootstrap(
|
||||
topic="graph topic",
|
||||
topic_slug="graph-topic",
|
||||
topic_name="Graph Topic",
|
||||
preview_only=False,
|
||||
expand=False,
|
||||
expansion_mode="both",
|
||||
expansion_rounds=3,
|
||||
recent_years=5,
|
||||
target_recent_entries=10,
|
||||
max_expanded_entries=120,
|
||||
max_expand_seconds=18.5,
|
||||
)
|
||||
|
||||
assert payload["topic"]["slug"] == "graph-topic"
|
||||
assert payload["entries"][0]["citation_key"] == "topic2024graph"
|
||||
assert payload["results"][0]["citation_key"] == "topic2024graph"
|
||||
assert bootstrapper.calls[0]["expansion_mode"] == "both"
|
||||
assert bootstrapper.calls[0]["expansion_rounds"] == 3
|
||||
assert bootstrapper.calls[0]["recent_years"] == 5
|
||||
assert bootstrapper.calls[0]["target_recent_entries"] == 10
|
||||
assert bootstrapper.calls[0]["max_expanded_entries"] == 120
|
||||
assert bootstrapper.calls[0]["max_expand_seconds"] == 18.5
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_api_exports_topic_bibtex():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Graph Seed},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
api = LiteratureExplorerApi(store)
|
||||
|
||||
payload = api.export_topic_bibtex("graph-topic")
|
||||
|
||||
assert payload is not None
|
||||
assert payload["topic"]["slug"] == "graph-topic"
|
||||
assert payload["entry_count"] == 1
|
||||
assert "@article{seed2024," in payload["bibtex"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_api_topic_export_skips_malformed_creator_entries():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{good2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Usable Entry},
|
||||
year = {2024}
|
||||
}
|
||||
|
||||
@article{bad2024,
|
||||
author = {Normal, Person},
|
||||
title = {Broken Entry},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic("good2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
store.add_entry_topic("bad2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
store.connection.execute(
|
||||
"""
|
||||
UPDATE creators
|
||||
SET full_name = 'Franck, Jean-Louis, Georges, MALIGE'
|
||||
WHERE full_name = 'Normal, Person'
|
||||
"""
|
||||
)
|
||||
store.connection.commit()
|
||||
api = LiteratureExplorerApi(store)
|
||||
|
||||
payload = api.export_topic_bibtex("graph-topic")
|
||||
|
||||
assert payload is not None
|
||||
assert payload["entry_count"] == 2
|
||||
assert payload["exported_count"] == 1
|
||||
assert "@article{good2024," in payload["bibtex"]
|
||||
assert "@article{bad2024," not in payload["bibtex"]
|
||||
assert payload["skipped"][0]["citation_key"] == "bad2024"
|
||||
assert "Too many commas" in payload["skipped"][0]["error"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
|
@ -131,13 +220,25 @@ def test_literature_explorer_api_expand_topic_returns_updated_entries():
|
|||
"""
|
||||
)
|
||||
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
api = LiteratureExplorerApi(store, topic_expander=FakeTopicExpander())
|
||||
topic_expander = FakeTopicExpander()
|
||||
api = LiteratureExplorerApi(store, topic_expander=topic_expander)
|
||||
|
||||
payload = api.expand_topic("graph-topic", preview_only=False)
|
||||
payload = api.expand_topic(
|
||||
"graph-topic",
|
||||
preview_only=False,
|
||||
relation_type="both",
|
||||
max_rounds=3,
|
||||
recent_years=5,
|
||||
target_recent_entries=10,
|
||||
)
|
||||
|
||||
assert payload is not None
|
||||
assert payload["results"][0]["discovered_citation_key"] == "discovered2025graph"
|
||||
assert any(item["citation_key"] == "discovered2025graph" for item in payload["entries"])
|
||||
assert topic_expander.calls[0]["relation_type"] == "both"
|
||||
assert topic_expander.calls[0]["max_rounds"] == 3
|
||||
assert topic_expander.calls[0]["recent_years"] == 5
|
||||
assert topic_expander.calls[0]["target_recent_entries"] == 10
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,32 @@
|
|||
from types import SimpleNamespace
|
||||
|
||||
from citegeist import BibliographyStore
|
||||
from citegeist.app_api import LiteratureExplorerApi
|
||||
from citegeist.app_server import LiteratureExplorerAppServer, create_request_handler
|
||||
from citegeist.bootstrap import BootstrapResult
|
||||
from citegeist.app_server import (
|
||||
LiteratureExplorerAppServer,
|
||||
_extract_bearer_token,
|
||||
_request_is_authorized,
|
||||
create_request_handler,
|
||||
)
|
||||
|
||||
|
||||
class FakeBootstrapper:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def bootstrap(self, store, **kwargs):
|
||||
self.calls.append(dict(kwargs))
|
||||
return [
|
||||
BootstrapResult(
|
||||
citation_key="graph2026topic",
|
||||
origin="topic",
|
||||
created=True,
|
||||
score=4.0,
|
||||
title="Graph Topic Result",
|
||||
year="2026",
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def test_literature_explorer_app_server_dispatch_search():
|
||||
|
|
@ -24,6 +50,94 @@ def test_literature_explorer_app_server_dispatch_search():
|
|||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_app_server_dispatch_exports_topic_bibtex():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Graph Topic Result},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store))
|
||||
|
||||
payload = server.dispatch("export_topic_bibtex", {"topic_slug": "graph-topic"})
|
||||
|
||||
assert payload["entry_count"] == 1
|
||||
assert "@article{seed2024," in payload["bibtex"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_app_server_dispatch_expand_topic_with_new_controls():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Graph Topic Result},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
|
||||
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store))
|
||||
|
||||
payload = server.dispatch(
|
||||
"expand_topic",
|
||||
{
|
||||
"topic_slug": "graph-topic",
|
||||
"relation_type": "both",
|
||||
"max_rounds": 3,
|
||||
"recent_years": 5,
|
||||
"target_recent_entries": 10,
|
||||
"preview_only": True,
|
||||
},
|
||||
)
|
||||
|
||||
assert payload["preview"] is True
|
||||
assert "results" in payload
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_app_server_dispatch_bootstrap_with_new_caps():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
bootstrapper = FakeBootstrapper()
|
||||
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store, bootstrapper=bootstrapper))
|
||||
|
||||
payload = server.dispatch(
|
||||
"bootstrap",
|
||||
{
|
||||
"topic": "graph topic",
|
||||
"topic_slug": "graph-topic",
|
||||
"topic_name": "Graph Topic",
|
||||
"preview_only": True,
|
||||
"expand": False,
|
||||
"expansion_mode": "cites",
|
||||
"expansion_rounds": 2,
|
||||
"recent_years": 5,
|
||||
"target_recent_entries": 4,
|
||||
"max_expanded_entries": 75,
|
||||
"max_expand_seconds": 12.5,
|
||||
},
|
||||
)
|
||||
|
||||
assert payload["preview"] is True
|
||||
assert "results" in payload
|
||||
assert bootstrapper.calls[0]["expansion_mode"] == "cites"
|
||||
assert bootstrapper.calls[0]["max_expanded_entries"] == 75
|
||||
assert bootstrapper.calls[0]["max_expand_seconds"] == 12.5
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_literature_explorer_http_handler_class_can_be_created():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
|
|
@ -43,3 +157,29 @@ def test_literature_explorer_http_handler_class_can_be_created():
|
|||
assert issubclass(handler, object)
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_request_authorization_accepts_bearer_and_header_token():
|
||||
headers = SimpleNamespace(
|
||||
get=lambda key, default="": {
|
||||
"Authorization": "Bearer secret-token",
|
||||
"X-API-Token": "secret-token",
|
||||
}.get(key, default)
|
||||
)
|
||||
|
||||
assert _extract_bearer_token(headers) == "secret-token"
|
||||
assert _request_is_authorized(headers, "secret-token") is True
|
||||
|
||||
|
||||
def test_request_authorization_rejects_missing_or_wrong_token():
|
||||
missing_headers = SimpleNamespace(get=lambda key, default="": default)
|
||||
wrong_headers = SimpleNamespace(
|
||||
get=lambda key, default="": {
|
||||
"Authorization": "Bearer wrong-token",
|
||||
"X-API-Token": "",
|
||||
}.get(key, default)
|
||||
)
|
||||
|
||||
assert _request_is_authorized(missing_headers, "secret-token") is False
|
||||
assert _request_is_authorized(wrong_headers, "secret-token") is False
|
||||
assert _request_is_authorized(missing_headers, None) is True
|
||||
|
|
|
|||
|
|
@ -127,3 +127,93 @@ def test_bootstrap_batch_cli_runs_json_jobs(tmp_path: Path):
|
|||
exit_code = main(["--db", str(database), "bootstrap-batch", str(batch_json)])
|
||||
|
||||
assert exit_code == 0
|
||||
|
||||
|
||||
def test_batch_runner_passes_new_expansion_settings(tmp_path: Path):
|
||||
jobs = [
|
||||
{
|
||||
"name": "topic-job",
|
||||
"topic": "graph topic",
|
||||
"expand": True,
|
||||
"expansion_mode": "both",
|
||||
"expansion_rounds": 3,
|
||||
"recent_years": 5,
|
||||
"target_recent_entries": 12,
|
||||
}
|
||||
]
|
||||
|
||||
runner = BatchBootstrapRunner()
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_bootstrap(_store, **kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
|
||||
runner.run(store, jobs)
|
||||
|
||||
assert captured["expansion_mode"] == "both"
|
||||
assert captured["expansion_rounds"] == 3
|
||||
assert captured["recent_years"] == 5
|
||||
assert captured["target_recent_entries"] == 12
|
||||
assert captured["max_expanded_entries"] is None
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_batch_runner_passes_max_expanded_entries(tmp_path: Path):
|
||||
jobs = [
|
||||
{
|
||||
"name": "topic-job",
|
||||
"topic": "graph topic",
|
||||
"expand": True,
|
||||
"expansion_mode": "cites",
|
||||
"max_expanded_entries": 25,
|
||||
}
|
||||
]
|
||||
|
||||
runner = BatchBootstrapRunner()
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_bootstrap(_store, **kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
|
||||
runner.run(store, jobs)
|
||||
|
||||
assert captured["max_expanded_entries"] == 25
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_batch_runner_passes_max_expand_seconds(tmp_path: Path):
|
||||
jobs = [
|
||||
{
|
||||
"name": "topic-job",
|
||||
"topic": "graph topic",
|
||||
"expand": True,
|
||||
"expansion_mode": "legacy",
|
||||
"max_expand_seconds": 12.5,
|
||||
}
|
||||
]
|
||||
|
||||
runner = BatchBootstrapRunner()
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_bootstrap(_store, **kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
|
||||
runner.run(store, jobs)
|
||||
|
||||
assert captured["max_expand_seconds"] == 12.5
|
||||
finally:
|
||||
store.close()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from citegeist import BibliographyStore
|
||||
from citegeist.bootstrap import Bootstrapper
|
||||
from citegeist.cli import main
|
||||
from citegeist.expand import ExpansionResult
|
||||
|
||||
|
||||
def test_bootstrap_from_seed_bib_only():
|
||||
|
|
@ -299,6 +300,169 @@ def test_bootstrap_topic_commit_requires_title_anchor():
|
|||
store.close()
|
||||
|
||||
|
||||
def test_bootstrap_nonlegacy_both_mode_expands_both_relations():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
bootstrapper = Bootstrapper()
|
||||
calls: list[tuple[str, str, int]] = []
|
||||
|
||||
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: ( # type: ignore[method-assign]
|
||||
calls.append((key, relation_type, limit)) or []
|
||||
)
|
||||
|
||||
bootstrapper.bootstrap(
|
||||
store,
|
||||
seed_bibtex="""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
expansion_mode="both",
|
||||
expand=True,
|
||||
)
|
||||
|
||||
assert calls == [("seed2024", "cites", 5), ("seed2024", "cited_by", 5)]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_bootstrap_recent_target_stops_recursive_openalex_expansion():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
bootstrapper = Bootstrapper()
|
||||
from citegeist import BibEntry
|
||||
|
||||
store.upsert_entry(
|
||||
BibEntry(entry_type="article", citation_key="recent2026", fields={"title": "Recent discovery", "year": "2026"}),
|
||||
source_type="graph_expand",
|
||||
source_label="test",
|
||||
review_status="draft",
|
||||
)
|
||||
store.connection.commit()
|
||||
|
||||
def fake_expand(_store, key, relation_type="cites", limit=5):
|
||||
if key == "seed2024":
|
||||
return [
|
||||
ExpansionResult(
|
||||
"seed2024",
|
||||
"recent2026",
|
||||
False,
|
||||
relation_type,
|
||||
f"openalex:{relation_type}:seed2024",
|
||||
)
|
||||
]
|
||||
return []
|
||||
|
||||
bootstrapper.openalex_expander.expand_entry = fake_expand # type: ignore[method-assign]
|
||||
|
||||
results = bootstrapper.bootstrap(
|
||||
store,
|
||||
seed_bibtex="""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
expansion_mode="cites",
|
||||
expansion_rounds=3,
|
||||
recent_years=2,
|
||||
target_recent_entries=1,
|
||||
expand=True,
|
||||
)
|
||||
|
||||
assert [item.origin for item in results][-1] == "openalex_expand:cites"
|
||||
assert [item.citation_key for item in results if item.origin.startswith("openalex_expand")] == ["recent2026"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_bootstrap_max_expanded_entries_caps_growth():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
bootstrapper = Bootstrapper()
|
||||
from citegeist import BibEntry
|
||||
|
||||
store.upsert_entry(
|
||||
BibEntry(entry_type="article", citation_key="d1", fields={"title": "Discovery One", "year": "2024"}),
|
||||
source_type="graph_expand",
|
||||
source_label="test",
|
||||
review_status="draft",
|
||||
)
|
||||
store.upsert_entry(
|
||||
BibEntry(entry_type="article", citation_key="d2", fields={"title": "Discovery Two", "year": "2024"}),
|
||||
source_type="graph_expand",
|
||||
source_label="test",
|
||||
review_status="draft",
|
||||
)
|
||||
store.connection.commit()
|
||||
|
||||
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: ( # type: ignore[method-assign]
|
||||
[
|
||||
ExpansionResult(key, "d1", False, relation_type, f"openalex:{relation_type}:{key}"),
|
||||
ExpansionResult(key, "d2", False, relation_type, f"openalex:{relation_type}:{key}"),
|
||||
]
|
||||
if key == "seed2024"
|
||||
else []
|
||||
)
|
||||
|
||||
results = bootstrapper.bootstrap(
|
||||
store,
|
||||
seed_bibtex="""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
""",
|
||||
expansion_mode="cites",
|
||||
expand=True,
|
||||
max_expanded_entries=1,
|
||||
)
|
||||
|
||||
assert [item.citation_key for item in results if item.origin.startswith("openalex_expand")] == ["d1"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_bootstrap_max_expand_seconds_stops_legacy_expansion(monkeypatch):
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
bootstrapper = Bootstrapper()
|
||||
ticks = iter([0.0, 0.0, 2.0, 2.0, 2.0])
|
||||
monkeypatch.setattr("citegeist.bootstrap.time.monotonic", lambda: next(ticks))
|
||||
calls: list[str] = []
|
||||
|
||||
bootstrapper.crossref_expander.expand_entry_references = lambda _store, key: (calls.append(f"crossref:{key}") or []) # type: ignore[method-assign]
|
||||
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: (calls.append(f"openalex:{key}") or []) # type: ignore[method-assign]
|
||||
|
||||
bootstrapper.bootstrap(
|
||||
store,
|
||||
seed_bibtex="""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
|
||||
@article{seed2023,
|
||||
author = {Seed, Bob},
|
||||
title = {Older Seed},
|
||||
year = {2023}
|
||||
}
|
||||
""",
|
||||
expansion_mode="legacy",
|
||||
expand=True,
|
||||
max_expand_seconds=1.0,
|
||||
)
|
||||
|
||||
assert len(calls) <= 2
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -11,8 +11,10 @@ from citegeist.storage import BibliographyStore
|
|||
class FakeOpenAlexExpander:
|
||||
def __init__(self, results: list[ExpansionResult] | dict[str, list[ExpansionResult]]) -> None:
|
||||
self.results = results
|
||||
self.calls: list[tuple[str, str, int]] = []
|
||||
|
||||
def expand_entry(self, store, citation_key, relation_type="cites", limit=25):
|
||||
self.calls.append((citation_key, relation_type, limit))
|
||||
if isinstance(self.results, dict):
|
||||
return list(self.results.get(citation_key, []))
|
||||
return list(self.results)
|
||||
|
|
@ -216,6 +218,101 @@ def test_topic_expander_preview_discovers_without_writing():
|
|||
store.close()
|
||||
|
||||
|
||||
def test_topic_expander_relation_type_both_uses_both_openalex_directions():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Abiogenesis Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic(
|
||||
"seed2024",
|
||||
topic_slug="abiogenesis",
|
||||
topic_name="Abiogenesis",
|
||||
source_type="talkorigins",
|
||||
source_url="https://example.org/topics/abiogenesis",
|
||||
source_label="seed",
|
||||
)
|
||||
fake_expander = FakeOpenAlexExpander([])
|
||||
expander = TopicExpander(openalex_expander=fake_expander)
|
||||
|
||||
expander.expand_topic(store, "abiogenesis", relation_type="both")
|
||||
|
||||
assert [relation for _seed, relation, _limit in fake_expander.calls] == ["cites", "cited_by"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_topic_expander_stops_once_recent_target_is_reached():
|
||||
store = BibliographyStore()
|
||||
try:
|
||||
store.ingest_bibtex(
|
||||
"""
|
||||
@article{seed2024,
|
||||
author = {Seed, Alice},
|
||||
title = {Abiogenesis Seed Paper},
|
||||
year = {2024}
|
||||
}
|
||||
"""
|
||||
)
|
||||
store.add_entry_topic(
|
||||
"seed2024",
|
||||
topic_slug="abiogenesis",
|
||||
topic_name="Abiogenesis",
|
||||
source_type="talkorigins",
|
||||
source_url="https://example.org/topics/abiogenesis",
|
||||
source_label="seed",
|
||||
)
|
||||
store.upsert_entry(
|
||||
BibEntry(
|
||||
entry_type="article",
|
||||
citation_key="recent1",
|
||||
fields={"title": "Abiogenesis pathways", "abstract": "abiogenesis", "year": "2026"},
|
||||
),
|
||||
source_type="graph_expand",
|
||||
source_label="test",
|
||||
review_status="draft",
|
||||
)
|
||||
store.upsert_entry(
|
||||
BibEntry(
|
||||
entry_type="article",
|
||||
citation_key="recent2",
|
||||
fields={"title": "Abiogenesis chemistry", "abstract": "abiogenesis", "year": "2025"},
|
||||
),
|
||||
source_type="graph_expand",
|
||||
source_label="test",
|
||||
review_status="draft",
|
||||
)
|
||||
store.connection.commit()
|
||||
|
||||
fake_expander = FakeOpenAlexExpander(
|
||||
{
|
||||
"seed2024": [ExpansionResult("seed2024", "recent1", False, "cites", "openalex:cites:seed2024")],
|
||||
"recent1": [ExpansionResult("recent1", "recent2", False, "cites", "openalex:cites:recent1")],
|
||||
}
|
||||
)
|
||||
expander = TopicExpander(openalex_expander=fake_expander)
|
||||
|
||||
results = expander.expand_topic(
|
||||
store,
|
||||
"abiogenesis",
|
||||
topic_phrase="abiogenesis chemistry",
|
||||
max_rounds=3,
|
||||
recent_years=2,
|
||||
target_recent_entries=1,
|
||||
)
|
||||
|
||||
assert [item.discovered_citation_key for item in results] == ["recent1"]
|
||||
assert fake_expander.calls == [("seed2024", "cites", 25)]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
def test_topic_relevance_score_expands_human_evolution_terms():
|
||||
score = _topic_relevance_score(
|
||||
"human evolution",
|
||||
|
|
|
|||
Loading…
Reference in New Issue