Added literature explorer example

This commit is contained in:
welsberr 2026-04-07 04:09:09 +00:00
parent 2459830b70
commit 7bdaf37c59
17 changed files with 1542 additions and 89 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ __pycache__/
*.pyc
*.egg-info/
library.sqlite3
ops/

View File

@ -0,0 +1,2 @@
abd358981999c08a4f777efa4f38f2d4990c42e3421342cf9e9b460bf9700e41

View File

@ -52,6 +52,8 @@
position: sticky;
top: 1rem;
align-self: start;
max-height: calc(100vh - 2rem);
overflow-y: auto;
}
.content {
@ -191,6 +193,8 @@
.status.error { color: var(--danger); }
.status.ok { color: var(--accent-2); }
.status.note { color: #5d4716; }
.meta-grid {
display: grid;
gap: 0.5rem;
@ -382,6 +386,61 @@
margin-top: 0.75rem;
}
.api-reference {
display: grid;
gap: 0.85rem;
}
.summary-box {
margin-top: 0.75rem;
padding: 0.8rem 0.9rem;
border-radius: 16px;
background: rgba(255, 255, 255, 0.74);
border: 1px solid rgba(73, 57, 35, 0.11);
display: grid;
gap: 0.35rem;
}
.summary-box strong {
color: var(--ink);
}
.endpoint-card {
border-radius: 18px;
border: 1px solid rgba(73, 57, 35, 0.11);
background: rgba(255, 255, 255, 0.74);
padding: 0.95rem 1rem;
display: grid;
gap: 0.55rem;
}
.endpoint-head {
display: flex;
gap: 0.6rem;
align-items: center;
flex-wrap: wrap;
}
.endpoint-method {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 4.4rem;
padding: 0.28rem 0.55rem;
border-radius: 999px;
background: var(--accent-2);
color: #f7fbfb;
font-size: 0.78rem;
font-weight: 700;
letter-spacing: 0.04em;
}
.endpoint-path {
font-family: "IBM Plex Mono", "SFMono-Regular", monospace;
font-size: 0.9rem;
color: var(--ink);
}
@media (max-width: 1080px) {
.shell { grid-template-columns: 1fr; }
.sidebar { position: static; }
@ -403,11 +462,16 @@
<h2>Bridge</h2>
<label>
Server URL
<input id="server-url" value="http://127.0.0.1:8765" />
<input id="server-url" value="" />
</label>
<label>
API Token
<input id="api-token" type="password" value="" placeholder="Bearer token for /api access" />
</label>
<div class="toolbar">
<button id="connect-button" class="primary">Connect</button>
<button id="refresh-topics-button" class="tertiary">Refresh Topics</button>
<button id="api-reference-button" class="secondary">API Reference</button>
</div>
<div id="connect-status" class="status">Not connected.</div>
</section>
@ -442,10 +506,47 @@
<input id="bootstrap-status" value="draft" />
</label>
</div>
<div class="row-3">
<label>
Expansion Mode
<select id="bootstrap-expansion-mode">
<option value="legacy">legacy</option>
<option value="cites">cites</option>
<option value="cited_by">cited_by</option>
<option value="both">both</option>
</select>
</label>
<label>
Rounds
<input id="bootstrap-expansion-rounds" type="number" min="1" value="3" />
</label>
<label>
Recent Years
<input id="bootstrap-recent-years" type="number" min="0" value="5" />
</label>
</div>
<div class="row-3">
<label>
Recent Target
<input id="bootstrap-target-recent" type="number" min="1" value="5" />
</label>
<label>
Max Expanded Entries
<input id="bootstrap-max-expanded-entries" type="number" min="1" value="100" />
</label>
<label>
Max Expand Seconds
<input id="bootstrap-max-expand-seconds" type="number" min="1" step="0.5" value="20" />
</label>
</div>
<div class="toolbar">
<button id="bootstrap-preview-button" class="secondary">Preview Bootstrap</button>
<button id="bootstrap-commit-button" class="primary">Commit Bootstrap</button>
</div>
<div id="bootstrap-summary" class="summary-box">
<strong>Bootstrap Policy</strong>
<p>Use graph-limited bootstrap when you want topic seeding and expansion in one pass. The same expansion policy applies to preview and commit.</p>
</div>
</section>
<section class="panel card">
@ -471,6 +572,7 @@
<select id="expand-relation">
<option value="cites">cites</option>
<option value="cited_by">cited_by</option>
<option value="both">both</option>
</select>
</label>
<label>
@ -488,10 +590,28 @@
<input id="expand-per-seed-limit" type="number" min="1" value="12" />
</label>
</div>
<div class="row-3">
<label>
Rounds
<input id="expand-rounds" type="number" min="1" value="3" />
</label>
<label>
Recent Years
<input id="expand-recent-years" type="number" min="0" value="5" />
</label>
<label>
Recent Target
<input id="expand-target-recent" type="number" min="1" value="10" />
</label>
</div>
<div class="toolbar">
<button id="expand-preview-button" class="secondary">Preview Expansion</button>
<button id="expand-commit-button" class="primary">Apply Expansion</button>
</div>
<div id="expand-summary" class="summary-box">
<strong>Expansion Policy</strong>
<p>Use <code>cites</code> to bias toward newer work, or <code>both</code> for broader graph growth. Recursive rounds stop once the recent-entry target is met.</p>
</div>
</section>
<section class="panel card">
@ -579,14 +699,83 @@
<h2>Graph View</h2>
<div id="graph-output" class="empty">Load a topic to view a small local network around its first few entries.</div>
</section>
<section id="api-reference" class="panel card">
<h2>API Reference</h2>
<div class="api-reference">
<div class="endpoint-card">
<div class="endpoint-head">
<span class="endpoint-method">GET</span>
<span class="endpoint-path">/api/healthz</span>
</div>
<p>Health check endpoint. Does not require a bearer token.</p>
</div>
<div class="endpoint-card">
<div class="endpoint-head">
<span class="endpoint-method">GET</span>
<span class="endpoint-path">/api/capabilities</span>
</div>
<p>Returns the available operation names and preview-capable actions. Requires <code>Authorization: Bearer &lt;token&gt;</code>.</p>
</div>
<div class="endpoint-card">
<div class="endpoint-head">
<span class="endpoint-method">POST</span>
<span class="endpoint-path">/api/call</span>
</div>
<p>RPC-style API entry point for search, topic loading, graph traversal, extraction, verification, and topic BibTeX export.</p>
<div class="code-block">{
"method": "expand_topic",
"params": {
"topic_slug": "acraniates",
"relation_type": "cites",
"max_rounds": 3,
"recent_years": 5,
"target_recent_entries": 10
}
}</div>
</div>
<div class="endpoint-card">
<div class="endpoint-head">
<span class="endpoint-method">POST</span>
<span class="endpoint-path">/api/call bootstrap policy</span>
</div>
<p>Bootstrap also accepts expansion policy controls when you want bounded topic seeding plus graph growth in one step.</p>
<div class="code-block">{
"method": "bootstrap",
"params": {
"topic": "abiogenesis",
"topic_slug": "abiogenesis",
"expansion_mode": "cites",
"expansion_rounds": 3,
"recent_years": 5,
"target_recent_entries": 5,
"max_expanded_entries": 100,
"max_expand_seconds": 20
}
}</div>
</div>
<div class="endpoint-card">
<div class="endpoint-head">
<span class="endpoint-method">AUTH</span>
<span class="endpoint-path">Bearer token</span>
</div>
<p>Set the token in the sidebar once. The demo stores it in localStorage and attaches it to subsequent <code>/api/*</code> requests.</p>
</div>
</div>
</section>
</main>
</div>
<script type="module">
import { createHttpBridge, createLiteratureExplorerClient } from "./literature-explorer.js";
const DEFAULT_BRIDGE_URL = window.location.origin.startsWith("http")
? `${window.location.origin}/api`
: "http://127.0.0.1:8765";
const state = {
bridgeUrl: "http://127.0.0.1:8765",
bridgeUrl: localStorage.getItem("citegeist.bridgeUrl") || DEFAULT_BRIDGE_URL,
apiToken: localStorage.getItem("citegeist.apiToken") || "",
client: null,
topics: [],
activeTopic: null,
@ -595,8 +784,10 @@
const els = {
serverUrl: document.getElementById("server-url"),
apiToken: document.getElementById("api-token"),
connectButton: document.getElementById("connect-button"),
refreshTopicsButton: document.getElementById("refresh-topics-button"),
apiReferenceButton: document.getElementById("api-reference-button"),
connectStatus: document.getElementById("connect-status"),
topicsList: document.getElementById("topics-list"),
topicView: document.getElementById("topic-view"),
@ -614,8 +805,15 @@
bootstrapTopicLimit: document.getElementById("bootstrap-topic-limit"),
bootstrapCommitLimit: document.getElementById("bootstrap-commit-limit"),
bootstrapStatus: document.getElementById("bootstrap-status"),
bootstrapExpansionMode: document.getElementById("bootstrap-expansion-mode"),
bootstrapExpansionRounds: document.getElementById("bootstrap-expansion-rounds"),
bootstrapRecentYears: document.getElementById("bootstrap-recent-years"),
bootstrapTargetRecent: document.getElementById("bootstrap-target-recent"),
bootstrapMaxExpandedEntries: document.getElementById("bootstrap-max-expanded-entries"),
bootstrapMaxExpandSeconds: document.getElementById("bootstrap-max-expand-seconds"),
bootstrapPreviewButton: document.getElementById("bootstrap-preview-button"),
bootstrapCommitButton: document.getElementById("bootstrap-commit-button"),
bootstrapSummary: document.getElementById("bootstrap-summary"),
expandTopicSlug: document.getElementById("expand-topic-slug"),
expandTopicPhrase: document.getElementById("expand-topic-phrase"),
expandSource: document.getElementById("expand-source"),
@ -623,8 +821,12 @@
expandMinRelevance: document.getElementById("expand-min-relevance"),
expandSeedLimit: document.getElementById("expand-seed-limit"),
expandPerSeedLimit: document.getElementById("expand-per-seed-limit"),
expandRounds: document.getElementById("expand-rounds"),
expandRecentYears: document.getElementById("expand-recent-years"),
expandTargetRecent: document.getElementById("expand-target-recent"),
expandPreviewButton: document.getElementById("expand-preview-button"),
expandCommitButton: document.getElementById("expand-commit-button"),
expandSummary: document.getElementById("expand-summary"),
searchQuery: document.getElementById("search-query"),
searchTopic: document.getElementById("search-topic"),
searchButton: document.getElementById("search-button"),
@ -633,6 +835,9 @@
verifyButton: document.getElementById("verify-button"),
};
els.serverUrl.value = state.bridgeUrl;
els.apiToken.value = state.apiToken;
function setStatus(text, kind = "") {
els.connectStatus.textContent = text;
els.connectStatus.className = `status ${kind}`.trim();
@ -706,6 +911,9 @@
${topic.expansion_phrase ? `<span class="pill">${escapeHtml(topic.expansion_phrase)}</span>` : ""}
${topic.source_url ? `<span class="pill">${escapeHtml(topic.source_url)}</span>` : ""}
</div>
<div class="toolbar">
<button type="button" class="secondary" data-export-topic="${escapeHtml(topic.slug)}">Export Topic BibTeX</button>
</div>
</div>
${entries.map((entry) => `
<div class="list-item">
@ -724,6 +932,11 @@
await loadEntry(node.getAttribute("data-entry-key"));
});
});
els.topicView.querySelectorAll("[data-export-topic]").forEach((node) => {
node.addEventListener("click", async () => {
await exportTopicBibtex(node.getAttribute("data-export-topic"));
});
});
}
function renderEntry(entry) {
@ -842,11 +1055,40 @@
els.extractVerifyOutput.textContent = JSON.stringify(payload, null, 2);
}
function renderExpandSummary(payload) {
if (!els.expandSummary) return;
const results = payload?.results || [];
const assigned = results.filter((item) => item.assigned_to_topic).length;
const runMeta = payload?.run_meta || {};
els.expandSummary.innerHTML = `
<strong>Expansion Summary</strong>
<p>${results.length} discoveries returned · ${assigned} assigned to topic · relation ${escapeHtml(els.expandRelation.value)} · rounds ${escapeHtml(els.expandRounds.value)}</p>
<p>Recent target: ${escapeHtml(els.expandTargetRecent.value)} within ${escapeHtml(els.expandRecentYears.value)} years. Stop reason: <strong>${escapeHtml(runMeta.stop_reason || "unknown")}</strong>.</p>
<p>Recent hits: ${escapeHtml(runMeta.recent_hits ?? 0)} · recent topic hits: ${escapeHtml(runMeta.recent_topic_hits ?? 0)}.</p>
`;
}
function renderBootstrapSummary(payload) {
if (!els.bootstrapSummary) return;
const results = payload?.results || [];
const created = results.filter((item) => item.created).length;
const runMeta = payload?.run_meta || {};
els.bootstrapSummary.innerHTML = `
<strong>Bootstrap Summary</strong>
<p>${results.length} candidate entries returned · ${created} newly created in this pass · mode ${escapeHtml(els.bootstrapExpansionMode.value)} · rounds ${escapeHtml(els.bootstrapExpansionRounds.value)}</p>
<p>Recent target: ${escapeHtml(els.bootstrapTargetRecent.value)} within ${escapeHtml(els.bootstrapRecentYears.value)} years · caps ${escapeHtml(els.bootstrapMaxExpandedEntries.value)} entries / ${escapeHtml(els.bootstrapMaxExpandSeconds.value)} seconds.</p>
<p>Stop reason: <strong>${escapeHtml(runMeta.stop_reason || "unknown")}</strong> · expanded discoveries: ${escapeHtml(runMeta.expanded_discoveries ?? 0)} · recent topic hits: ${escapeHtml(runMeta.recent_topic_hits ?? 0)}.</p>
`;
}
async function connect() {
setBusy(els.connectButton, true);
try {
state.bridgeUrl = els.serverUrl.value.trim() || state.bridgeUrl;
const bridge = createHttpBridge(state.bridgeUrl);
state.apiToken = els.apiToken.value.trim();
localStorage.setItem("citegeist.bridgeUrl", state.bridgeUrl);
localStorage.setItem("citegeist.apiToken", state.apiToken);
const bridge = createHttpBridge(state.bridgeUrl, { token: state.apiToken });
const client = createLiteratureExplorerClient(bridge);
const capabilities = await client.capabilities();
state.client = client;
@ -919,10 +1161,17 @@
topic_limit: Number(els.bootstrapTopicLimit.value || 5),
topic_commit_limit: Number(els.bootstrapCommitLimit.value || 0) || null,
preview_only: previewOnly,
expand: false,
expand: els.bootstrapExpansionMode.value !== "legacy",
review_status: els.bootstrapStatus.value.trim() || "draft",
expansion_mode: els.bootstrapExpansionMode.value,
expansion_rounds: Number(els.bootstrapExpansionRounds.value || 1),
recent_years: Number(els.bootstrapRecentYears.value || 0) || null,
target_recent_entries: Number(els.bootstrapTargetRecent.value || 0) || null,
max_expanded_entries: Number(els.bootstrapMaxExpandedEntries.value || 0) || null,
max_expand_seconds: Number(els.bootstrapMaxExpandSeconds.value || 0) || null,
});
renderExtractVerify(payload);
renderBootstrapSummary(payload);
setLastOp(previewOnly ? "bootstrap_preview" : "bootstrap_commit");
logActivity(previewOnly ? "bootstrap_preview" : "bootstrap_commit", payload);
if (!previewOnly) {
@ -954,9 +1203,13 @@
min_relevance: Number(els.expandMinRelevance.value || 0.2),
seed_limit: Number(els.expandSeedLimit.value || 10),
per_seed_limit: Number(els.expandPerSeedLimit.value || 12),
max_rounds: Number(els.expandRounds.value || 1),
recent_years: Number(els.expandRecentYears.value || 0),
target_recent_entries: Number(els.expandTargetRecent.value || 0) || null,
preview_only: previewOnly,
});
renderExtractVerify(payload);
renderExpandSummary(payload);
setLastOp(previewOnly ? "expand_preview" : "expand_commit");
logActivity(previewOnly ? "expand_preview" : "expand_commit", payload);
if (!previewOnly && topicSlug) {
@ -1027,6 +1280,29 @@
}
}
async function exportTopicBibtex(topicSlug) {
if (!state.client || !topicSlug) {
setStatus("Connect to the server first.", "error");
return;
}
try {
const payload = await state.client.exportTopicBibtex(topicSlug, { include_stubs: false });
const filename = `${topicSlug}.bib`;
downloadText(filename, payload?.bibtex || "");
renderExtractVerify(payload);
setLastOp("export_topic_bibtex");
logActivity(`export_topic_bibtex:${topicSlug}`, payload);
const skippedCount = Array.isArray(payload?.skipped) ? payload.skipped.length : 0;
if (skippedCount) {
setStatus(`Exported ${filename} with ${skippedCount} skipped malformed entr${skippedCount === 1 ? "y" : "ies"}.`, "ok");
} else {
setStatus(`Exported ${filename}`, "ok");
}
} catch (error) {
setStatus(String(error.message || error), "error");
}
}
function escapeHtml(value) {
return String(value ?? "")
.replaceAll("&", "&amp;")
@ -1068,8 +1344,23 @@
});
}
function downloadText(filename, text) {
const blob = new Blob([text], { type: "application/x-bibtex; charset=utf-8" });
const url = URL.createObjectURL(blob);
const anchor = document.createElement("a");
anchor.href = url;
anchor.download = filename;
document.body.appendChild(anchor);
anchor.click();
anchor.remove();
URL.revokeObjectURL(url);
}
els.connectButton.addEventListener("click", connect);
els.refreshTopicsButton.addEventListener("click", refreshTopics);
els.apiReferenceButton.addEventListener("click", () => {
document.getElementById("api-reference").scrollIntoView({ behavior: "smooth", block: "start" });
});
els.bootstrapPreviewButton.addEventListener("click", () => runBootstrap(true));
els.bootstrapCommitButton.addEventListener("click", () => runBootstrap(false));
els.expandPreviewButton.addEventListener("click", () => runExpand(true));

View File

@ -15,6 +15,9 @@ export function createLiteratureExplorerClient(bridge) {
getTopic(topicSlug, options = {}) {
return bridge.call("get_topic", { topic_slug: topicSlug, ...options });
},
exportTopicBibtex(topicSlug, options = {}) {
return bridge.call("export_topic_bibtex", { topic_slug: topicSlug, ...options });
},
bootstrap(options = {}) {
return bridge.call("bootstrap", options);
},
@ -36,12 +39,23 @@ export function createLiteratureExplorerClient(bridge) {
};
}
export function createHttpBridge(baseUrl = "http://127.0.0.1:8765") {
function defaultApiBaseUrl() {
if (typeof window !== "undefined" && window.location?.origin) {
return `${window.location.origin}/api`;
}
return "http://127.0.0.1:8765";
}
export function createHttpBridge(baseUrl = defaultApiBaseUrl(), options = {}) {
const token = String(options.token || "").trim();
return {
async call(method, params = {}) {
const response = await fetch(`${baseUrl}/call`, {
method: "POST",
headers: { "Content-Type": "application/json" },
headers: {
"Content-Type": "application/json",
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
body: JSON.stringify({ method, params }),
});
const payload = await response.json();

View File

@ -33,6 +33,7 @@ class LiteratureExplorerApi:
"show_entry",
"list_topics",
"get_topic",
"export_topic_bibtex",
"bootstrap",
"expand_topic",
"extract_text",
@ -81,6 +82,22 @@ class LiteratureExplorerApi:
"entries": self.store.list_topic_entries(topic_slug, limit=entry_limit),
}
def export_topic_bibtex(self, topic_slug: str, *, include_stubs: bool = False) -> dict[str, object] | None:
topic = self.store.get_topic(topic_slug)
if topic is None:
return None
entries = self.store.list_topic_entries(topic_slug, limit=100000)
citation_keys = [row["citation_key"] for row in entries]
export = self.store.export_bibtex_report(citation_keys, include_stubs=include_stubs)
return {
"topic": topic,
"entry_count": len(citation_keys),
"exported_count": export["exported_count"],
"include_stubs": include_stubs,
"skipped": export["skipped"],
"bibtex": export["bibtex"],
}
def bootstrap(
self,
*,
@ -94,6 +111,12 @@ class LiteratureExplorerApi:
expand: bool = True,
preview_only: bool = False,
review_status: str = "draft",
expansion_mode: str = "legacy",
expansion_rounds: int = 1,
recent_years: int | None = None,
target_recent_entries: int | None = None,
max_expanded_entries: int | None = None,
max_expand_seconds: float | None = None,
) -> dict[str, object]:
results = self.bootstrapper.bootstrap(
self.store,
@ -107,6 +130,12 @@ class LiteratureExplorerApi:
topic_slug=topic_slug,
topic_name=topic_name,
topic_phrase=topic_phrase,
expansion_mode=expansion_mode,
expansion_rounds=expansion_rounds,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
max_expanded_entries=max_expanded_entries,
max_expand_seconds=max_expand_seconds,
)
effective_slug = topic_slug
if effective_slug is None and topic:
@ -114,6 +143,7 @@ class LiteratureExplorerApi:
payload: dict[str, object] = {
"preview": preview_only,
"results": [asdict(result) for result in results],
"run_meta": dict(getattr(self.bootstrapper, "last_run_meta", {}) or {}),
}
if effective_slug is not None:
payload["topic"] = self.store.get_topic(effective_slug)
@ -132,6 +162,9 @@ class LiteratureExplorerApi:
min_relevance: float = 0.2,
seed_keys: list[str] | None = None,
preview_only: bool = False,
max_rounds: int = 1,
recent_years: int | None = None,
target_recent_entries: int | None = None,
) -> dict[str, object] | None:
topic = self.store.get_topic(topic_slug)
if topic is None:
@ -147,12 +180,16 @@ class LiteratureExplorerApi:
min_relevance=min_relevance,
seed_keys=seed_keys,
preview_only=preview_only,
max_rounds=max_rounds,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
)
return {
"topic": self.store.get_topic(topic_slug),
"preview": preview_only,
"results": [asdict(result) for result in results],
"entries": self.store.list_topic_entries(topic_slug, limit=200),
"run_meta": dict(getattr(self.topic_expander, "last_run_meta", {}) or {}),
}
def extract_text(self, text: str, *, backend: str = "heuristic") -> dict[str, object]:

View File

@ -1,9 +1,10 @@
from __future__ import annotations
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from http.server import BaseHTTPRequestHandler, HTTPServer
import argparse
import json
import os
from pathlib import Path
from typing import Any
@ -12,8 +13,9 @@ from .storage import BibliographyStore
class LiteratureExplorerAppServer:
def __init__(self, api: LiteratureExplorerApi) -> None:
def __init__(self, api: LiteratureExplorerApi, *, api_token: str | None = None) -> None:
self.api = api
self.api_token = (api_token or "").strip() or None
def dispatch(self, method: str, params: dict[str, Any] | None = None) -> Any:
params = params or {}
@ -42,6 +44,11 @@ class LiteratureExplorerAppServer:
str(params.get("topic_slug") or ""),
entry_limit=int(params.get("entry_limit", 100)),
)
if method == "export_topic_bibtex":
return self.api.export_topic_bibtex(
str(params.get("topic_slug") or ""),
include_stubs=bool(params.get("include_stubs", False)),
)
if method == "bootstrap":
return self.api.bootstrap(
seed_bibtex=_optional_str(params.get("seed_bibtex")),
@ -54,6 +61,12 @@ class LiteratureExplorerAppServer:
expand=bool(params.get("expand", True)),
preview_only=bool(params.get("preview_only", False)),
review_status=str(params.get("review_status") or "draft"),
expansion_mode=str(params.get("expansion_mode") or "legacy"),
expansion_rounds=int(params.get("expansion_rounds", 1)),
recent_years=_optional_int(params.get("recent_years")),
target_recent_entries=_optional_int(params.get("target_recent_entries")),
max_expanded_entries=_optional_int(params.get("max_expanded_entries")),
max_expand_seconds=_optional_float(params.get("max_expand_seconds")),
)
if method == "expand_topic":
return self.api.expand_topic(
@ -66,6 +79,9 @@ class LiteratureExplorerAppServer:
min_relevance=float(params.get("min_relevance", 0.2)),
seed_keys=_string_list(params.get("seed_keys")),
preview_only=bool(params.get("preview_only", False)),
max_rounds=int(params.get("max_rounds", 1)),
recent_years=_optional_int(params.get("recent_years")),
target_recent_entries=_optional_int(params.get("target_recent_entries")),
)
if method == "extract_text":
return self.api.extract_text(
@ -106,6 +122,9 @@ def create_request_handler(server: LiteratureExplorerAppServer):
if self.path != "/call":
self._write_json({"error": "not_found"}, status=HTTPStatus.NOT_FOUND)
return
if not _request_is_authorized(self.headers, server.api_token):
self._write_unauthorized()
return
try:
body = self.rfile.read(int(self.headers.get("Content-Length", "0") or "0"))
payload = json.loads(body.decode("utf-8") or "{}")
@ -125,6 +144,9 @@ def create_request_handler(server: LiteratureExplorerAppServer):
self._write_json({"ok": True})
return
if self.path == "/capabilities":
if not _request_is_authorized(self.headers, server.api_token):
self._write_unauthorized()
return
self._write_json({"ok": True, "result": server.dispatch("capabilities", {})})
return
self._write_json({"error": "not_found"}, status=HTTPStatus.NOT_FOUND)
@ -143,9 +165,19 @@ def create_request_handler(server: LiteratureExplorerAppServer):
def _write_cors_headers(self) -> None:
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.send_header("Access-Control-Allow-Headers", "Authorization, Content-Type, X-API-Token")
self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
def _write_unauthorized(self) -> None:
body = json.dumps({"ok": False, "error": "unauthorized"}, indent=2).encode("utf-8")
self.send_response(HTTPStatus.UNAUTHORIZED)
self._write_cors_headers()
self.send_header("WWW-Authenticate", 'Bearer realm="citegeist"')
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
return Handler
@ -154,12 +186,14 @@ def main(argv: list[str] | None = None) -> int:
parser.add_argument("--db", default="library.sqlite3", help="SQLite database path")
parser.add_argument("--host", default="127.0.0.1", help="Bind host")
parser.add_argument("--port", type=int, default=8765, help="Bind port")
parser.add_argument("--api-token", default=None, help="Optional bearer token required for API access")
args = parser.parse_args(argv)
store = BibliographyStore(Path(args.db))
api = LiteratureExplorerApi(store)
server = LiteratureExplorerAppServer(api)
httpd = ThreadingHTTPServer((args.host, args.port), create_request_handler(server))
api_token = args.api_token or os.environ.get("CITEGEIST_API_TOKEN")
server = LiteratureExplorerAppServer(api, api_token=api_token)
httpd = HTTPServer((args.host, args.port), create_request_handler(server))
try:
print(f"CiteGeist explorer server listening on http://{args.host}:{args.port}")
httpd.serve_forever()
@ -182,6 +216,12 @@ def _optional_int(value: object) -> int | None:
return int(value)
def _optional_float(value: object) -> float | None:
if value is None or value == "":
return None
return float(value)
def _string_list(value: object) -> list[str]:
if value is None:
return []
@ -190,5 +230,28 @@ def _string_list(value: object) -> list[str]:
return [str(value)] if str(value) else []
def _request_is_authorized(headers: Any, api_token: str | None) -> bool:
if not api_token:
return True
bearer_value = _extract_bearer_token(headers)
if bearer_value == api_token:
return True
header_token = headers.get("X-API-Token", "").strip() if headers else ""
return header_token == api_token
def _extract_bearer_token(headers: Any) -> str | None:
if not headers:
return None
authorization = headers.get("Authorization", "")
if not authorization:
return None
scheme, _, value = authorization.partition(" ")
if scheme.lower() != "bearer":
return None
value = value.strip()
return value or None
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -56,6 +56,12 @@ class BatchBootstrapRunner:
topic_slug = job.get("topic_slug")
topic_name = job.get("topic_name")
topic_phrase = job.get("topic_phrase")
expansion_mode = str(job.get("expansion_mode", "legacy"))
expansion_rounds = int(job.get("expansion_rounds", 1))
recent_years = job.get("recent_years")
target_recent_entries = job.get("target_recent_entries")
max_expanded_entries = job.get("max_expanded_entries")
max_expand_seconds = job.get("max_expand_seconds")
seed_bibtex = None
if seed_bib:
@ -73,6 +79,12 @@ class BatchBootstrapRunner:
topic_slug=str(topic_slug) if topic_slug else None,
topic_name=str(topic_name) if topic_name else None,
topic_phrase=str(topic_phrase) if topic_phrase else None,
expansion_mode=expansion_mode,
expansion_rounds=expansion_rounds,
recent_years=int(recent_years) if recent_years is not None else None,
target_recent_entries=int(target_recent_entries) if target_recent_entries is not None else None,
max_expanded_entries=int(max_expanded_entries) if max_expanded_entries is not None else None,
max_expand_seconds=float(max_expand_seconds) if max_expand_seconds is not None else None,
)
results.append(BatchJobResult(name, len(job_results), job_results))
return results

View File

@ -1,10 +1,19 @@
from __future__ import annotations
from dataclasses import dataclass
import random
import re
import time
from .bibtex import BibEntry, parse_bibtex
from .expand import CrossrefExpander, OpenAlexExpander
from .expand import (
CrossrefExpander,
OpenAlexExpander,
_entry_is_recent,
_expand_relation_types,
_meets_topic_assignment_threshold as _expand_meets_topic_assignment_threshold,
_topic_relevance_score as _expand_topic_relevance_score,
)
from .resolve import MetadataResolver
from .storage import BibliographyStore
@ -31,6 +40,7 @@ class Bootstrapper:
self.resolver = resolver or MetadataResolver()
self.crossref_expander = crossref_expander or CrossrefExpander(self.resolver)
self.openalex_expander = openalex_expander or OpenAlexExpander(self.resolver)
self.last_run_meta: dict[str, object] = {}
def bootstrap(
self,
@ -45,7 +55,25 @@ class Bootstrapper:
topic_slug: str | None = None,
topic_name: str | None = None,
topic_phrase: str | None = None,
expansion_mode: str = "legacy",
expansion_rounds: int = 1,
recent_years: int | None = None,
target_recent_entries: int | None = None,
max_expanded_entries: int | None = None,
max_expand_seconds: float | None = None,
) -> list[BootstrapResult]:
self.last_run_meta = {
"stop_reason": "completed",
"expansion_mode": expansion_mode,
"preview_only": preview_only,
"recent_years": recent_years,
"target_recent_entries": target_recent_entries,
"max_expanded_entries": max_expanded_entries,
"max_expand_seconds": max_expand_seconds,
"recent_hits": 0,
"recent_topic_hits": 0,
"expanded_discoveries": 0,
}
results: list[BootstrapResult] = []
seed_keys: list[str] = []
effective_topic_slug = topic_slug or (_slugify(topic) if topic else None)
@ -140,15 +168,200 @@ class Bootstrapper:
if expand and not preview_only:
expanded_keys = list(dict.fromkeys(seed_keys))
for citation_key in expanded_keys:
for item in self.crossref_expander.expand_entry_references(store, citation_key):
results.append(BootstrapResult(item.discovered_citation_key, "crossref_expand", item.created_entry))
for item in self.openalex_expander.expand_entry(store, citation_key, relation_type="cites", limit=topic_limit):
results.append(BootstrapResult(item.discovered_citation_key, "openalex_expand", item.created_entry))
expanded_discoveries: set[str] = set()
deadline = time.monotonic() + max_expand_seconds if max_expand_seconds is not None else None
if expansion_mode == "legacy":
random.shuffle(expanded_keys)
for citation_key in expanded_keys:
if _deadline_reached(deadline):
store.connection.commit()
return results
for item in self.crossref_expander.expand_entry_references(store, citation_key):
results.append(BootstrapResult(item.discovered_citation_key, "crossref_expand", item.created_entry))
expanded_discoveries.add(item.discovered_citation_key)
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
self.last_run_meta.update({
"stop_reason": "max_expanded_entries",
"expanded_discoveries": len(expanded_discoveries),
})
store.connection.commit()
return results
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"expanded_discoveries": len(expanded_discoveries),
})
store.connection.commit()
return results
for item in self.openalex_expander.expand_entry(
store,
citation_key,
relation_type="cites",
limit=topic_limit,
):
results.append(BootstrapResult(item.discovered_citation_key, "openalex_expand", item.created_entry))
expanded_discoveries.add(item.discovered_citation_key)
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
self.last_run_meta.update({
"stop_reason": "max_expanded_entries",
"expanded_discoveries": len(expanded_discoveries),
})
store.connection.commit()
return results
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"expanded_discoveries": len(expanded_discoveries),
})
store.connection.commit()
return results
else:
results.extend(
self._bootstrap_openalex_expansion(
store,
expanded_keys,
relation_type=expansion_mode,
limit=topic_limit,
max_rounds=expansion_rounds,
topic_slug=effective_topic_slug,
topic_name=effective_topic_name,
topic_phrase=topic_phrase or topic,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
max_expanded_entries=max_expanded_entries,
deadline=deadline,
)
)
self.last_run_meta.setdefault("stop_reason", "completed")
store.connection.commit()
return results
def _bootstrap_openalex_expansion(
self,
store: BibliographyStore,
seed_keys: list[str],
relation_type: str,
limit: int,
max_rounds: int,
topic_slug: str | None,
topic_name: str | None,
topic_phrase: str | None,
recent_years: int | None,
target_recent_entries: int | None,
max_expanded_entries: int | None,
deadline: float | None,
) -> list[BootstrapResult]:
results: list[BootstrapResult] = []
frontier = list(dict.fromkeys(seed_keys))
seen_seeds: set[str] = set()
recent_hits: set[str] = set()
recent_topic_hits: set[str] = set()
expanded_discoveries: set[str] = set()
for _round in range(max(1, max_rounds)):
if not frontier:
break
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
next_frontier: list[str] = []
for citation_key in frontier:
if citation_key in seen_seeds:
continue
seen_seeds.add(citation_key)
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
for relation_name in _expand_relation_types(relation_type):
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
for item in self.openalex_expander.expand_entry(
store,
citation_key,
relation_type=relation_name,
limit=limit,
):
discovered_key = item.discovered_citation_key
entry = store.get_entry(discovered_key)
if _entry_is_recent(entry, recent_years):
recent_hits.add(discovered_key)
if topic_slug and topic_name and topic_phrase and entry is not None:
score = _expand_topic_relevance_score(topic_phrase, entry)
if _expand_meets_topic_assignment_threshold(
topic_phrase,
entry,
min_relevance=0.2,
relevance_score=score,
):
store.add_entry_topic(
discovered_key,
topic_slug=topic_slug,
topic_name=topic_name,
source_type="bootstrap_expand",
source_label=f"openalex:{relation_name}:{citation_key}",
confidence=score,
expansion_phrase=topic_phrase,
)
if _entry_is_recent(entry, recent_years) and score >= 0.5:
recent_topic_hits.add(discovered_key)
results.append(BootstrapResult(discovered_key, f"openalex_expand:{relation_name}", item.created_entry))
expanded_discoveries.add(discovered_key)
if discovered_key not in seen_seeds:
next_frontier.append(discovered_key)
if max_expanded_entries is not None and len(expanded_discoveries) >= max_expanded_entries:
self.last_run_meta.update({
"stop_reason": "max_expanded_entries",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
self.last_run_meta.update({
"stop_reason": "target_recent_entries",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
if _deadline_reached(deadline):
self.last_run_meta.update({
"stop_reason": "max_expand_seconds",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
frontier = list(dict.fromkeys(next_frontier))
self.last_run_meta.update({
"stop_reason": "frontier_exhausted",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
"expanded_discoveries": len(expanded_discoveries),
})
return results
def _deadline_reached(deadline: float | None) -> bool:
return deadline is not None and time.monotonic() >= deadline
def _topic_candidates(self, topic: str, seed_keys: list[str], limit: int) -> list[tuple[BibEntry, float]]:
scored: dict[str, tuple[BibEntry, float]] = {}

View File

@ -11,7 +11,7 @@ from .batch import BatchBootstrapRunner, load_batch_jobs
from .bibtex import parse_bibtex, render_bibtex
from .bootstrap import Bootstrapper
from .examples.talkorigins import TalkOriginsScraper
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander
from .expand import CrossrefExpander, OpenAlexExpander, TopicExpander, _expand_relation_types
from .extract import (
available_extraction_backends,
check_extraction_comparison_summary,
@ -202,7 +202,7 @@ def build_parser() -> argparse.ArgumentParser:
)
expand_parser.add_argument(
"--relation",
choices=["cites", "cited_by"],
choices=["cites", "cited_by", "both"],
default="cites",
help="Graph direction to expand for sources that support it",
)
@ -225,12 +225,23 @@ def build_parser() -> argparse.ArgumentParser:
)
expand_topic_parser.add_argument(
"--relation",
choices=["cites", "cited_by"],
choices=["cites", "cited_by", "both"],
default="cites",
help="Graph direction to expand for sources that support it",
)
expand_topic_parser.add_argument("--seed-limit", type=int, default=25, help="Maximum topic seed entries to expand from")
expand_topic_parser.add_argument("--per-seed-limit", type=int, default=25, help="Maximum discovered works to fetch per seed")
expand_topic_parser.add_argument("--rounds", type=int, default=1, help="Maximum recursive expansion rounds")
expand_topic_parser.add_argument(
"--recent-years",
type=int,
help="Treat discoveries within this many years of the current year as recent for termination heuristics",
)
expand_topic_parser.add_argument(
"--target-recent-entries",
type=int,
help="Stop recursive topic expansion once this many recent discoveries have been found",
)
expand_topic_parser.add_argument(
"--seed-key",
action="append",
@ -300,6 +311,38 @@ def build_parser() -> argparse.ArgumentParser:
action="store_true",
help="Do not run immediate graph expansion after seeding",
)
bootstrap_parser.add_argument(
"--expansion-mode",
choices=["legacy", "cites", "cited_by", "both"],
default="legacy",
help="Expansion policy after bootstrap seeding; legacy keeps Crossref refs plus OpenAlex cites",
)
bootstrap_parser.add_argument(
"--expansion-rounds",
type=int,
default=1,
help="Maximum recursive OpenAlex expansion rounds for non-legacy expansion modes",
)
bootstrap_parser.add_argument(
"--recent-years",
type=int,
help="Treat discoveries within this many years of the current year as recent for termination heuristics",
)
bootstrap_parser.add_argument(
"--target-recent-entries",
type=int,
help="Stop non-legacy expansion once this many recent discoveries have been found",
)
bootstrap_parser.add_argument(
"--max-expanded-entries",
type=int,
help="Hard cap on unique discovered entries added during one bootstrap job",
)
bootstrap_parser.add_argument(
"--max-expand-seconds",
type=float,
help="Wall-clock cap for one bootstrap job's expansion phase",
)
bootstrap_parser.add_argument(
"--preview",
action="store_true",
@ -364,6 +407,38 @@ def build_parser() -> argparse.ArgumentParser:
type=int,
help="Default bootstrap topic commit limit to include in generated jobs",
)
talkorigins_parser.add_argument(
"--expansion-mode",
choices=["legacy", "cites", "cited_by", "both"],
default="legacy",
help="Expansion policy to write into generated bootstrap jobs",
)
talkorigins_parser.add_argument(
"--expansion-rounds",
type=int,
default=1,
help="Maximum recursive OpenAlex expansion rounds to write into generated jobs",
)
talkorigins_parser.add_argument(
"--recent-years",
type=int,
help="Optional recent-discovery window to write into generated jobs",
)
talkorigins_parser.add_argument(
"--target-recent-entries",
type=int,
help="Optional recent-discovery target to write into generated jobs",
)
talkorigins_parser.add_argument(
"--max-expanded-entries",
type=int,
help="Optional hard cap on unique discovered entries per generated bootstrap job",
)
talkorigins_parser.add_argument(
"--max-expand-seconds",
type=float,
help="Optional wall-clock cap to write into generated bootstrap jobs",
)
talkorigins_parser.add_argument("--status", default="draft", help="Review status for generated seed jobs")
validate_talkorigins_parser = subparsers.add_parser(
@ -637,6 +712,11 @@ def main(argv: list[str] | None = None) -> int:
args.min_relevance,
args.seed_keys,
args.preview,
args.rounds,
args.recent_years,
args.target_recent_entries,
args.max_expanded_entries,
args.max_expand_seconds,
)
if args.command == "set-topic-phrase":
return _run_set_topic_phrase(store, args.topic_slug, args.phrase, args.clear)
@ -666,6 +746,10 @@ def main(argv: list[str] | None = None) -> int:
args.topic_slug,
args.topic_name,
args.store_topic_phrase,
args.expansion_mode,
args.expansion_rounds,
args.recent_years,
args.target_recent_entries,
)
if args.command == "bootstrap-batch":
return _run_bootstrap_batch(store, Path(args.input))
@ -682,6 +766,12 @@ def main(argv: list[str] | None = None) -> int:
not args.no_resume,
args.topic_limit,
args.topic_commit_limit,
args.expansion_mode,
args.expansion_rounds,
args.recent_years,
args.target_recent_entries,
args.max_expanded_entries,
args.max_expand_seconds,
args.status,
)
if args.command in {"example-talkorigins-validate", "validate-talkorigins"}:
@ -1387,7 +1477,11 @@ def _run_expand(
expand_fn = lambda key: expander.expand_entry_references(store, key)
elif source == "openalex":
expander = OpenAlexExpander()
expand_fn = lambda key: expander.expand_entry(store, key, relation_type=relation, limit=limit)
expand_fn = lambda key: [
item
for relation_name in _expand_relation_types(relation)
for item in expander.expand_entry(store, key, relation_type=relation_name, limit=limit)
]
else:
print(f"Unsupported expansion source: {source}", file=sys.stderr)
return 1
@ -1412,6 +1506,9 @@ def _run_expand_topic(
min_relevance: float,
seed_keys: list[str] | None,
preview: bool,
rounds: int,
recent_years: int | None,
target_recent_entries: int | None,
) -> int:
expander = TopicExpander()
_print_phase(f"Loading topic expansion for {topic_slug}")
@ -1430,6 +1527,9 @@ def _run_expand_topic(
min_relevance=min_relevance,
seed_keys=seed_keys,
preview_only=preview,
max_rounds=rounds,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
)
print(json.dumps([asdict(result) for result in results], indent=2))
return 0
@ -1513,6 +1613,12 @@ def _run_bootstrap(
topic_slug: str | None,
topic_name: str | None,
stored_topic_phrase: str | None,
expansion_mode: str,
expansion_rounds: int,
recent_years: int | None,
target_recent_entries: int | None,
max_expanded_entries: int | None,
max_expand_seconds: float | None,
) -> int:
if not seed_bib and not topic:
print("bootstrap requires --seed-bib, --topic, or both", file=sys.stderr)
@ -1533,6 +1639,12 @@ def _run_bootstrap(
topic_slug=topic_slug,
topic_name=topic_name,
topic_phrase=stored_topic_phrase,
expansion_mode=expansion_mode,
expansion_rounds=expansion_rounds,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
max_expanded_entries=max_expanded_entries,
max_expand_seconds=max_expand_seconds,
)
print(json.dumps([asdict(result) for result in results], indent=2))
return 0
@ -1570,6 +1682,12 @@ def _run_scrape_talkorigins(
resume: bool,
topic_limit: int,
topic_commit_limit: int | None,
expansion_mode: str,
expansion_rounds: int,
recent_years: int | None,
target_recent_entries: int | None,
max_expanded_entries: int | None,
max_expand_seconds: float | None,
review_status: str,
) -> int:
scraper = TalkOriginsScraper()
@ -1586,6 +1704,12 @@ def _run_scrape_talkorigins(
resume=resume,
topic_limit=topic_limit,
topic_commit_limit=topic_commit_limit,
expansion_mode=expansion_mode,
expansion_rounds=expansion_rounds,
recent_years=recent_years,
target_recent_entries=target_recent_entries,
max_expanded_entries=max_expanded_entries,
max_expand_seconds=max_expand_seconds,
)
print(json.dumps(asdict(export), indent=2))
return 0

View File

@ -1,5 +1,6 @@
from __future__ import annotations
from datetime import date
import html
import re
from dataclasses import dataclass
@ -180,6 +181,7 @@ class OpenAlexExpander:
if relation_type == "cites":
source_key = citation_key
target_key = target_key
else:
source_key = target_key
target_key = citation_key
@ -194,8 +196,8 @@ class OpenAlexExpander:
)
results.append(
ExpansionResult(
source_citation_key=source_key,
discovered_citation_key=target_key,
source_citation_key=citation_key,
discovered_citation_key=existing_key or discovered.citation_key,
created_entry=created,
relation_type=relation_type,
source_label=f"openalex:{relation_type}:{openalex_id}",
@ -225,6 +227,7 @@ class TopicExpander:
) -> None:
self.crossref_expander = crossref_expander or CrossrefExpander()
self.openalex_expander = openalex_expander or OpenAlexExpander()
self.last_run_meta: dict[str, object] = {}
def expand_topic(
self,
@ -238,7 +241,21 @@ class TopicExpander:
min_relevance: float = 0.2,
seed_keys: list[str] | None = None,
preview_only: bool = False,
max_rounds: int = 1,
recent_years: int | None = None,
target_recent_entries: int | None = None,
) -> list[TopicExpansionResult]:
self.last_run_meta = {
"stop_reason": "completed",
"preview_only": preview_only,
"relation_type": relation_type,
"source": source,
"max_rounds": max_rounds,
"recent_years": recent_years,
"target_recent_entries": target_recent_entries,
"recent_hits": 0,
"recent_topic_hits": 0,
}
topic = store.get_topic(topic_slug)
if topic is None:
return []
@ -249,59 +266,89 @@ class TopicExpander:
allowed = set(seed_keys)
seeds = [seed for seed in seeds if str(seed["citation_key"]) in allowed]
results: list[TopicExpansionResult] = []
frontier = [str(seed["citation_key"]) for seed in seeds]
seen_seed_keys: set[str] = set()
recent_hits: set[str] = set()
recent_topic_hits: set[str] = set()
for seed in seeds:
seed_key = str(seed["citation_key"])
if preview_only:
discovered_rows = self._preview_discoveries(
store,
seed_key,
source=source,
relation_type=relation_type,
limit=per_seed_limit,
)
else:
discovered_rows = self._materialized_discoveries(
store,
seed_key,
source=source,
relation_type=relation_type,
limit=per_seed_limit,
)
for row, target_entry in discovered_rows:
score = _topic_relevance_score(phrase, target_entry)
meets_threshold = _meets_topic_assignment_threshold(
phrase,
target_entry,
min_relevance=min_relevance,
relevance_score=score,
)
assigned = False
if not preview_only and meets_threshold and target_entry is not None:
assigned = store.add_entry_topic(
row.discovered_citation_key,
topic_slug=topic_slug,
topic_name=str(topic.get("name") or topic_slug),
source_type="topic_expand",
source_url=str(topic.get("source_url") or ""),
source_label=f"{source}:{relation_type}:{seed_key}",
confidence=score,
for _round in range(max(1, max_rounds)):
if not frontier:
break
next_frontier: list[str] = []
for seed_key in frontier:
if seed_key in seen_seed_keys:
continue
seen_seed_keys.add(seed_key)
if preview_only:
discovered_rows = self._preview_discoveries(
store,
seed_key,
source=source,
relation_type=relation_type,
limit=per_seed_limit,
)
results.append(
TopicExpansionResult(
topic_slug=topic_slug,
source_citation_key=row.source_citation_key,
discovered_citation_key=row.discovered_citation_key,
discovered_title=str(target_entry.get("title") or ""),
created_entry=row.created_entry,
relation_type=row.relation_type,
source_label=row.source_label,
else:
discovered_rows = self._materialized_discoveries(
store,
seed_key,
source=source,
relation_type=relation_type,
limit=per_seed_limit,
)
for row, target_entry in discovered_rows:
score = _topic_relevance_score(phrase, target_entry)
meets_threshold = _meets_topic_assignment_threshold(
phrase,
target_entry,
min_relevance=min_relevance,
relevance_score=score,
meets_relevance_threshold=meets_threshold,
assigned_to_topic=assigned,
)
)
assigned = False
if not preview_only and meets_threshold and target_entry is not None:
assigned = store.add_entry_topic(
row.discovered_citation_key,
topic_slug=topic_slug,
topic_name=str(topic.get("name") or topic_slug),
source_type="topic_expand",
source_url=str(topic.get("source_url") or ""),
source_label=f"{source}:{row.relation_type}:{seed_key}",
confidence=score,
)
if assigned and _entry_is_recent(target_entry, recent_years) and score >= 0.5:
recent_topic_hits.add(row.discovered_citation_key)
if _entry_is_recent(target_entry, recent_years):
recent_hits.add(row.discovered_citation_key)
if row.discovered_citation_key not in seen_seed_keys:
next_frontier.append(row.discovered_citation_key)
results.append(
TopicExpansionResult(
topic_slug=topic_slug,
source_citation_key=row.source_citation_key,
discovered_citation_key=row.discovered_citation_key,
discovered_title=str(target_entry.get("title") or ""),
created_entry=row.created_entry,
relation_type=row.relation_type,
source_label=row.source_label,
relevance_score=score,
meets_relevance_threshold=meets_threshold,
assigned_to_topic=assigned,
)
)
if target_recent_entries is not None and len(recent_topic_hits) >= target_recent_entries:
self.last_run_meta.update({
"stop_reason": "target_recent_entries",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
})
store.connection.commit()
return results
frontier = list(dict.fromkeys(next_frontier))
self.last_run_meta.update({
"stop_reason": "frontier_exhausted",
"recent_hits": len(recent_hits),
"recent_topic_hits": len(recent_topic_hits),
})
store.connection.commit()
return results
@ -316,12 +363,16 @@ class TopicExpander:
if source == "crossref":
expansion_rows = self.crossref_expander.expand_entry_references(store, citation_key)
else:
expansion_rows = self.openalex_expander.expand_entry(
store,
citation_key,
relation_type=relation_type,
limit=limit,
)
expansion_rows: list[ExpansionResult] = []
for relation_name in _expand_relation_types(relation_type):
expansion_rows.extend(
self.openalex_expander.expand_entry(
store,
citation_key,
relation_type=relation_name,
limit=limit,
)
)
return [(row, store.get_entry(row.discovered_citation_key)) for row in expansion_rows]
def _preview_discoveries(
@ -334,7 +385,10 @@ class TopicExpander:
) -> list[tuple[ExpansionResult, dict[str, object]]]:
if source == "crossref":
return self._preview_crossref_discoveries(store, citation_key, limit)
return self._preview_openalex_discoveries(store, citation_key, relation_type, limit)
rows: list[tuple[ExpansionResult, dict[str, object]]] = []
for relation_name in _expand_relation_types(relation_type):
rows.extend(self._preview_openalex_discoveries(store, citation_key, relation_name, limit))
return rows
def _preview_crossref_discoveries(
self,
@ -399,11 +453,10 @@ class TopicExpander:
if existing_key is None and _skip_openalex_review_like_duplicate(store, discovered):
continue
target_key = existing_key or discovered.citation_key
source_key = citation_key if relation_type == "cites" else target_key
rows.append(
(
ExpansionResult(
source_citation_key=source_key,
source_citation_key=citation_key,
discovered_citation_key=target_key,
created_entry=existing_key is None and store.get_entry(discovered.citation_key) is None,
relation_type=relation_type,
@ -441,6 +494,21 @@ def _crossref_reference_to_entry(reference: dict, source_citation_key: str, ordi
return BibEntry(entry_type=entry_type, citation_key=citation_key, fields=fields)
def _expand_relation_types(relation_type: str) -> list[str]:
if relation_type == "both":
return ["cites", "cited_by"]
return [relation_type]
def _entry_is_recent(entry: dict[str, object] | None, recent_years: int | None) -> bool:
if entry is None or recent_years is None or recent_years < 0:
return False
year_value = str(entry.get("year") or "").strip()
if not year_value.isdigit():
return False
return int(year_value) >= date.today().year - recent_years
def _crossref_reference_title(reference: dict, ordinal: int) -> str:
raw_title = (
reference.get("article-title")

View File

@ -1049,6 +1049,13 @@ class BibliographyStore:
return render_bibtex([entry])
def export_bibtex(self, citation_keys: list[str] | None = None, include_stubs: bool | None = None) -> str:
return self.export_bibtex_report(citation_keys, include_stubs=include_stubs)["bibtex"]
def export_bibtex_report(
self,
citation_keys: list[str] | None = None,
include_stubs: bool | None = None,
) -> dict[str, object]:
explicit_keys = citation_keys is not None
if include_stubs is None:
include_stubs = explicit_keys
@ -1058,7 +1065,6 @@ class BibliographyStore:
).fetchall()
citation_keys = [str(row["citation_key"]) for row in rows]
chunks: list[str] = []
entries: list[BibEntry] = []
for citation_key in citation_keys:
entry = self._load_bib_entry(citation_key)
@ -1066,9 +1072,27 @@ class BibliographyStore:
if not include_stubs and self._is_export_stub(entry):
continue
entries.append(entry)
if not entries:
return ""
return render_bibtex(entries)
chunks: list[str] = []
skipped: list[dict[str, str]] = []
for entry in entries:
try:
rendered = render_bibtex([entry]).strip()
except Exception as exc:
skipped.append(
{
"citation_key": entry.citation_key,
"error": str(exc),
}
)
continue
if rendered:
chunks.append(rendered)
return {
"bibtex": "\n\n".join(chunks).strip(),
"requested_count": len(entries),
"exported_count": len(chunks),
"skipped": skipped,
}
def _detect_fts5(self) -> bool:
try:

View File

@ -189,6 +189,12 @@ class TalkOriginsScraper:
expand: bool = False,
topic_limit: int = 5,
topic_commit_limit: int | None = None,
expansion_mode: str = "legacy",
expansion_rounds: int = 1,
recent_years: int | None = None,
target_recent_entries: int | None = None,
max_expanded_entries: int | None = None,
max_expand_seconds: float | None = None,
resume: bool = True,
) -> TalkOriginsBatchExport:
output_root = Path(output_dir)
@ -286,6 +292,12 @@ class TalkOriginsScraper:
"status": review_status,
"topic_limit": topic_limit,
"topic_commit_limit": topic_commit_limit,
"expansion_mode": expansion_mode,
"expansion_rounds": expansion_rounds,
"recent_years": recent_years,
"target_recent_entries": target_recent_entries,
"max_expanded_entries": max_expanded_entries,
"max_expand_seconds": max_expand_seconds,
}
)

View File

@ -6,7 +6,11 @@ from citegeist.expand import ExpansionResult
class FakeBootstrapper:
def __init__(self) -> None:
self.calls: list[dict] = []
def bootstrap(self, store, **kwargs):
self.calls.append(dict(kwargs))
if not kwargs.get("preview_only"):
store.ensure_topic("graph-topic", "Graph Topic", source_type="bootstrap", expansion_phrase="graph topic")
store.upsert_entry(
@ -40,7 +44,11 @@ class FakeBootstrapper:
class FakeTopicExpander:
def __init__(self) -> None:
self.calls: list[dict] = []
def expand_topic(self, store, topic_slug, **kwargs):
self.calls.append({"topic_slug": topic_slug, **kwargs})
preview_only = kwargs.get("preview_only", False)
if not preview_only:
store.upsert_entry(
@ -102,18 +110,99 @@ def test_literature_explorer_api_search_and_show_entry():
def test_literature_explorer_api_bootstrap_returns_topic_payload():
store = BibliographyStore()
try:
api = LiteratureExplorerApi(store, bootstrapper=FakeBootstrapper())
bootstrapper = FakeBootstrapper()
api = LiteratureExplorerApi(store, bootstrapper=bootstrapper)
payload = api.bootstrap(
topic="graph topic",
topic_slug="graph-topic",
topic_name="Graph Topic",
preview_only=False,
expand=False,
expansion_mode="both",
expansion_rounds=3,
recent_years=5,
target_recent_entries=10,
max_expanded_entries=120,
max_expand_seconds=18.5,
)
assert payload["topic"]["slug"] == "graph-topic"
assert payload["entries"][0]["citation_key"] == "topic2024graph"
assert payload["results"][0]["citation_key"] == "topic2024graph"
assert bootstrapper.calls[0]["expansion_mode"] == "both"
assert bootstrapper.calls[0]["expansion_rounds"] == 3
assert bootstrapper.calls[0]["recent_years"] == 5
assert bootstrapper.calls[0]["target_recent_entries"] == 10
assert bootstrapper.calls[0]["max_expanded_entries"] == 120
assert bootstrapper.calls[0]["max_expand_seconds"] == 18.5
finally:
store.close()
def test_literature_explorer_api_exports_topic_bibtex():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Graph Seed},
year = {2024}
}
"""
)
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
api = LiteratureExplorerApi(store)
payload = api.export_topic_bibtex("graph-topic")
assert payload is not None
assert payload["topic"]["slug"] == "graph-topic"
assert payload["entry_count"] == 1
assert "@article{seed2024," in payload["bibtex"]
finally:
store.close()
def test_literature_explorer_api_topic_export_skips_malformed_creator_entries():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{good2024,
author = {Seed, Alice},
title = {Usable Entry},
year = {2024}
}
@article{bad2024,
author = {Normal, Person},
title = {Broken Entry},
year = {2024}
}
"""
)
store.add_entry_topic("good2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
store.add_entry_topic("bad2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
store.connection.execute(
"""
UPDATE creators
SET full_name = 'Franck, Jean-Louis, Georges, MALIGE'
WHERE full_name = 'Normal, Person'
"""
)
store.connection.commit()
api = LiteratureExplorerApi(store)
payload = api.export_topic_bibtex("graph-topic")
assert payload is not None
assert payload["entry_count"] == 2
assert payload["exported_count"] == 1
assert "@article{good2024," in payload["bibtex"]
assert "@article{bad2024," not in payload["bibtex"]
assert payload["skipped"][0]["citation_key"] == "bad2024"
assert "Too many commas" in payload["skipped"][0]["error"]
finally:
store.close()
@ -131,13 +220,25 @@ def test_literature_explorer_api_expand_topic_returns_updated_entries():
"""
)
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
api = LiteratureExplorerApi(store, topic_expander=FakeTopicExpander())
topic_expander = FakeTopicExpander()
api = LiteratureExplorerApi(store, topic_expander=topic_expander)
payload = api.expand_topic("graph-topic", preview_only=False)
payload = api.expand_topic(
"graph-topic",
preview_only=False,
relation_type="both",
max_rounds=3,
recent_years=5,
target_recent_entries=10,
)
assert payload is not None
assert payload["results"][0]["discovered_citation_key"] == "discovered2025graph"
assert any(item["citation_key"] == "discovered2025graph" for item in payload["entries"])
assert topic_expander.calls[0]["relation_type"] == "both"
assert topic_expander.calls[0]["max_rounds"] == 3
assert topic_expander.calls[0]["recent_years"] == 5
assert topic_expander.calls[0]["target_recent_entries"] == 10
finally:
store.close()

View File

@ -1,6 +1,32 @@
from types import SimpleNamespace
from citegeist import BibliographyStore
from citegeist.app_api import LiteratureExplorerApi
from citegeist.app_server import LiteratureExplorerAppServer, create_request_handler
from citegeist.bootstrap import BootstrapResult
from citegeist.app_server import (
LiteratureExplorerAppServer,
_extract_bearer_token,
_request_is_authorized,
create_request_handler,
)
class FakeBootstrapper:
def __init__(self) -> None:
self.calls: list[dict] = []
def bootstrap(self, store, **kwargs):
self.calls.append(dict(kwargs))
return [
BootstrapResult(
citation_key="graph2026topic",
origin="topic",
created=True,
score=4.0,
title="Graph Topic Result",
year="2026",
)
]
def test_literature_explorer_app_server_dispatch_search():
@ -24,6 +50,94 @@ def test_literature_explorer_app_server_dispatch_search():
store.close()
def test_literature_explorer_app_server_dispatch_exports_topic_bibtex():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Graph Topic Result},
year = {2024}
}
"""
)
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store))
payload = server.dispatch("export_topic_bibtex", {"topic_slug": "graph-topic"})
assert payload["entry_count"] == 1
assert "@article{seed2024," in payload["bibtex"]
finally:
store.close()
def test_literature_explorer_app_server_dispatch_expand_topic_with_new_controls():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Graph Topic Result},
year = {2024}
}
"""
)
store.add_entry_topic("seed2024", topic_slug="graph-topic", topic_name="Graph Topic", source_label="seed")
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store))
payload = server.dispatch(
"expand_topic",
{
"topic_slug": "graph-topic",
"relation_type": "both",
"max_rounds": 3,
"recent_years": 5,
"target_recent_entries": 10,
"preview_only": True,
},
)
assert payload["preview"] is True
assert "results" in payload
finally:
store.close()
def test_literature_explorer_app_server_dispatch_bootstrap_with_new_caps():
store = BibliographyStore()
try:
bootstrapper = FakeBootstrapper()
server = LiteratureExplorerAppServer(LiteratureExplorerApi(store, bootstrapper=bootstrapper))
payload = server.dispatch(
"bootstrap",
{
"topic": "graph topic",
"topic_slug": "graph-topic",
"topic_name": "Graph Topic",
"preview_only": True,
"expand": False,
"expansion_mode": "cites",
"expansion_rounds": 2,
"recent_years": 5,
"target_recent_entries": 4,
"max_expanded_entries": 75,
"max_expand_seconds": 12.5,
},
)
assert payload["preview"] is True
assert "results" in payload
assert bootstrapper.calls[0]["expansion_mode"] == "cites"
assert bootstrapper.calls[0]["max_expanded_entries"] == 75
assert bootstrapper.calls[0]["max_expand_seconds"] == 12.5
finally:
store.close()
def test_literature_explorer_http_handler_class_can_be_created():
store = BibliographyStore()
try:
@ -43,3 +157,29 @@ def test_literature_explorer_http_handler_class_can_be_created():
assert issubclass(handler, object)
finally:
store.close()
def test_request_authorization_accepts_bearer_and_header_token():
headers = SimpleNamespace(
get=lambda key, default="": {
"Authorization": "Bearer secret-token",
"X-API-Token": "secret-token",
}.get(key, default)
)
assert _extract_bearer_token(headers) == "secret-token"
assert _request_is_authorized(headers, "secret-token") is True
def test_request_authorization_rejects_missing_or_wrong_token():
missing_headers = SimpleNamespace(get=lambda key, default="": default)
wrong_headers = SimpleNamespace(
get=lambda key, default="": {
"Authorization": "Bearer wrong-token",
"X-API-Token": "",
}.get(key, default)
)
assert _request_is_authorized(missing_headers, "secret-token") is False
assert _request_is_authorized(wrong_headers, "secret-token") is False
assert _request_is_authorized(missing_headers, None) is True

View File

@ -127,3 +127,93 @@ def test_bootstrap_batch_cli_runs_json_jobs(tmp_path: Path):
exit_code = main(["--db", str(database), "bootstrap-batch", str(batch_json)])
assert exit_code == 0
def test_batch_runner_passes_new_expansion_settings(tmp_path: Path):
jobs = [
{
"name": "topic-job",
"topic": "graph topic",
"expand": True,
"expansion_mode": "both",
"expansion_rounds": 3,
"recent_years": 5,
"target_recent_entries": 12,
}
]
runner = BatchBootstrapRunner()
store = BibliographyStore()
try:
captured: dict[str, object] = {}
def fake_bootstrap(_store, **kwargs):
captured.update(kwargs)
return []
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
runner.run(store, jobs)
assert captured["expansion_mode"] == "both"
assert captured["expansion_rounds"] == 3
assert captured["recent_years"] == 5
assert captured["target_recent_entries"] == 12
assert captured["max_expanded_entries"] is None
finally:
store.close()
def test_batch_runner_passes_max_expanded_entries(tmp_path: Path):
jobs = [
{
"name": "topic-job",
"topic": "graph topic",
"expand": True,
"expansion_mode": "cites",
"max_expanded_entries": 25,
}
]
runner = BatchBootstrapRunner()
store = BibliographyStore()
try:
captured: dict[str, object] = {}
def fake_bootstrap(_store, **kwargs):
captured.update(kwargs)
return []
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
runner.run(store, jobs)
assert captured["max_expanded_entries"] == 25
finally:
store.close()
def test_batch_runner_passes_max_expand_seconds(tmp_path: Path):
jobs = [
{
"name": "topic-job",
"topic": "graph topic",
"expand": True,
"expansion_mode": "legacy",
"max_expand_seconds": 12.5,
}
]
runner = BatchBootstrapRunner()
store = BibliographyStore()
try:
captured: dict[str, object] = {}
def fake_bootstrap(_store, **kwargs):
captured.update(kwargs)
return []
runner.bootstrapper.bootstrap = fake_bootstrap # type: ignore[method-assign]
runner.run(store, jobs)
assert captured["max_expand_seconds"] == 12.5
finally:
store.close()

View File

@ -1,6 +1,7 @@
from citegeist import BibliographyStore
from citegeist.bootstrap import Bootstrapper
from citegeist.cli import main
from citegeist.expand import ExpansionResult
def test_bootstrap_from_seed_bib_only():
@ -299,6 +300,169 @@ def test_bootstrap_topic_commit_requires_title_anchor():
store.close()
def test_bootstrap_nonlegacy_both_mode_expands_both_relations():
store = BibliographyStore()
try:
bootstrapper = Bootstrapper()
calls: list[tuple[str, str, int]] = []
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: ( # type: ignore[method-assign]
calls.append((key, relation_type, limit)) or []
)
bootstrapper.bootstrap(
store,
seed_bibtex="""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
""",
expansion_mode="both",
expand=True,
)
assert calls == [("seed2024", "cites", 5), ("seed2024", "cited_by", 5)]
finally:
store.close()
def test_bootstrap_recent_target_stops_recursive_openalex_expansion():
store = BibliographyStore()
try:
bootstrapper = Bootstrapper()
from citegeist import BibEntry
store.upsert_entry(
BibEntry(entry_type="article", citation_key="recent2026", fields={"title": "Recent discovery", "year": "2026"}),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
def fake_expand(_store, key, relation_type="cites", limit=5):
if key == "seed2024":
return [
ExpansionResult(
"seed2024",
"recent2026",
False,
relation_type,
f"openalex:{relation_type}:seed2024",
)
]
return []
bootstrapper.openalex_expander.expand_entry = fake_expand # type: ignore[method-assign]
results = bootstrapper.bootstrap(
store,
seed_bibtex="""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
""",
expansion_mode="cites",
expansion_rounds=3,
recent_years=2,
target_recent_entries=1,
expand=True,
)
assert [item.origin for item in results][-1] == "openalex_expand:cites"
assert [item.citation_key for item in results if item.origin.startswith("openalex_expand")] == ["recent2026"]
finally:
store.close()
def test_bootstrap_max_expanded_entries_caps_growth():
store = BibliographyStore()
try:
bootstrapper = Bootstrapper()
from citegeist import BibEntry
store.upsert_entry(
BibEntry(entry_type="article", citation_key="d1", fields={"title": "Discovery One", "year": "2024"}),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.upsert_entry(
BibEntry(entry_type="article", citation_key="d2", fields={"title": "Discovery Two", "year": "2024"}),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: ( # type: ignore[method-assign]
[
ExpansionResult(key, "d1", False, relation_type, f"openalex:{relation_type}:{key}"),
ExpansionResult(key, "d2", False, relation_type, f"openalex:{relation_type}:{key}"),
]
if key == "seed2024"
else []
)
results = bootstrapper.bootstrap(
store,
seed_bibtex="""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
""",
expansion_mode="cites",
expand=True,
max_expanded_entries=1,
)
assert [item.citation_key for item in results if item.origin.startswith("openalex_expand")] == ["d1"]
finally:
store.close()
def test_bootstrap_max_expand_seconds_stops_legacy_expansion(monkeypatch):
store = BibliographyStore()
try:
bootstrapper = Bootstrapper()
ticks = iter([0.0, 0.0, 2.0, 2.0, 2.0])
monkeypatch.setattr("citegeist.bootstrap.time.monotonic", lambda: next(ticks))
calls: list[str] = []
bootstrapper.crossref_expander.expand_entry_references = lambda _store, key: (calls.append(f"crossref:{key}") or []) # type: ignore[method-assign]
bootstrapper.openalex_expander.expand_entry = lambda _store, key, relation_type="cites", limit=5: (calls.append(f"openalex:{key}") or []) # type: ignore[method-assign]
bootstrapper.bootstrap(
store,
seed_bibtex="""
@article{seed2024,
author = {Seed, Alice},
title = {Seed Paper},
year = {2024}
}
@article{seed2023,
author = {Seed, Bob},
title = {Older Seed},
year = {2023}
}
""",
expansion_mode="legacy",
expand=True,
max_expand_seconds=1.0,
)
assert len(calls) <= 2
finally:
store.close()
def test_bootstrap_preview_uses_topic_commit_limit_when_larger_than_topic_limit():
store = BibliographyStore()
try:

View File

@ -11,8 +11,10 @@ from citegeist.storage import BibliographyStore
class FakeOpenAlexExpander:
def __init__(self, results: list[ExpansionResult] | dict[str, list[ExpansionResult]]) -> None:
self.results = results
self.calls: list[tuple[str, str, int]] = []
def expand_entry(self, store, citation_key, relation_type="cites", limit=25):
self.calls.append((citation_key, relation_type, limit))
if isinstance(self.results, dict):
return list(self.results.get(citation_key, []))
return list(self.results)
@ -216,6 +218,101 @@ def test_topic_expander_preview_discovers_without_writing():
store.close()
def test_topic_expander_relation_type_both_uses_both_openalex_directions():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
fake_expander = FakeOpenAlexExpander([])
expander = TopicExpander(openalex_expander=fake_expander)
expander.expand_topic(store, "abiogenesis", relation_type="both")
assert [relation for _seed, relation, _limit in fake_expander.calls] == ["cites", "cited_by"]
finally:
store.close()
def test_topic_expander_stops_once_recent_target_is_reached():
store = BibliographyStore()
try:
store.ingest_bibtex(
"""
@article{seed2024,
author = {Seed, Alice},
title = {Abiogenesis Seed Paper},
year = {2024}
}
"""
)
store.add_entry_topic(
"seed2024",
topic_slug="abiogenesis",
topic_name="Abiogenesis",
source_type="talkorigins",
source_url="https://example.org/topics/abiogenesis",
source_label="seed",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="recent1",
fields={"title": "Abiogenesis pathways", "abstract": "abiogenesis", "year": "2026"},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.upsert_entry(
BibEntry(
entry_type="article",
citation_key="recent2",
fields={"title": "Abiogenesis chemistry", "abstract": "abiogenesis", "year": "2025"},
),
source_type="graph_expand",
source_label="test",
review_status="draft",
)
store.connection.commit()
fake_expander = FakeOpenAlexExpander(
{
"seed2024": [ExpansionResult("seed2024", "recent1", False, "cites", "openalex:cites:seed2024")],
"recent1": [ExpansionResult("recent1", "recent2", False, "cites", "openalex:cites:recent1")],
}
)
expander = TopicExpander(openalex_expander=fake_expander)
results = expander.expand_topic(
store,
"abiogenesis",
topic_phrase="abiogenesis chemistry",
max_rounds=3,
recent_years=2,
target_recent_entries=1,
)
assert [item.discovered_citation_key for item in results] == ["recent1"]
assert fake_expander.calls == [("seed2024", "cites", 25)]
finally:
store.close()
def test_topic_relevance_score_expands_human_evolution_terms():
score = _topic_relevance_score(
"human evolution",