Support separate bibliography roots in review export
This commit is contained in:
parent
2f7696c115
commit
8201dd83ee
|
|
@ -177,16 +177,27 @@ def _resolve_source_root(import_dir: Path, source_root: str) -> str:
|
||||||
return str((import_dir.parent.parent / root).resolve())
|
return str((import_dir.parent.parent / root).resolve())
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_bibliography_root(import_dir: Path, manifest: dict[str, Any], resolved_source_root: str) -> str:
|
||||||
|
bibliography_root = str(manifest.get("bibliography_root", "")).strip()
|
||||||
|
if not bibliography_root:
|
||||||
|
return resolved_source_root
|
||||||
|
root = Path(bibliography_root)
|
||||||
|
if root.is_absolute():
|
||||||
|
return str(root)
|
||||||
|
return str((import_dir.parent.parent / root).resolve())
|
||||||
|
|
||||||
|
|
||||||
def _artifact_citation_payloads(
|
def _artifact_citation_payloads(
|
||||||
artifacts: list[dict[str, Any]],
|
artifacts: list[dict[str, Any]],
|
||||||
*,
|
*,
|
||||||
source_root: str,
|
source_root: str,
|
||||||
|
bibliography_root: str | None = None,
|
||||||
) -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]:
|
) -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]:
|
||||||
extract_references, backends = _load_citegeist_extract()
|
extract_references, backends = _load_citegeist_extract()
|
||||||
artifact_payloads: list[dict[str, Any]] = []
|
artifact_payloads: list[dict[str, Any]] = []
|
||||||
summaries: dict[str, dict[str, Any]] = {}
|
summaries: dict[str, dict[str, Any]] = {}
|
||||||
root = Path(source_root) if source_root else None
|
root = Path(source_root) if source_root else None
|
||||||
bibliography_index = load_bibliography_index(source_root) if source_root else {}
|
bibliography_index = load_bibliography_index(bibliography_root or source_root) if (bibliography_root or source_root) else {}
|
||||||
|
|
||||||
for artifact in artifacts:
|
for artifact in artifacts:
|
||||||
path = Path(source_root) / artifact["path"] if root is not None else None
|
path = Path(source_root) / artifact["path"] if root is not None else None
|
||||||
|
|
@ -280,14 +291,16 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci
|
||||||
base = Path(import_dir)
|
base = Path(import_dir)
|
||||||
manifest = _read_json(base / "manifest.json")
|
manifest = _read_json(base / "manifest.json")
|
||||||
resolved_source_root = _resolve_source_root(base, manifest.get("source_root", ""))
|
resolved_source_root = _resolve_source_root(base, manifest.get("source_root", ""))
|
||||||
|
resolved_bibliography_root = _resolve_bibliography_root(base, manifest, resolved_source_root)
|
||||||
artifacts = _read_jsonl(base / "artifacts.jsonl")
|
artifacts = _read_jsonl(base / "artifacts.jsonl")
|
||||||
observations = _read_jsonl(base / "observations.jsonl")
|
observations = _read_jsonl(base / "observations.jsonl")
|
||||||
claims = _read_jsonl(base / "claims.jsonl")
|
claims = _read_jsonl(base / "claims.jsonl")
|
||||||
bibliography_index = load_bibliography_index(resolved_source_root)
|
bibliography_index = load_bibliography_index(resolved_bibliography_root)
|
||||||
|
|
||||||
artifact_payloads, _ = _artifact_citation_payloads(
|
artifact_payloads, _ = _artifact_citation_payloads(
|
||||||
artifacts,
|
artifacts,
|
||||||
source_root=resolved_source_root,
|
source_root=resolved_source_root,
|
||||||
|
bibliography_root=resolved_bibliography_root,
|
||||||
)
|
)
|
||||||
observations_by_id = {item["observation_id"]: item for item in observations}
|
observations_by_id = {item["observation_id"]: item for item in observations}
|
||||||
artifact_claim_links: dict[str, dict[str, set[str]]] = defaultdict(lambda: {"claim_ids": set(), "concept_ids": set()})
|
artifact_claim_links: dict[str, dict[str, set[str]]] = defaultdict(lambda: {"claim_ids": set(), "concept_ids": set()})
|
||||||
|
|
@ -357,7 +370,8 @@ def build_citation_review_entries_from_import(import_dir: str | Path) -> list[Ci
|
||||||
def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> dict[str, Any]:
|
def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> dict[str, Any]:
|
||||||
manifest = _read_json(import_dir / "manifest.json")
|
manifest = _read_json(import_dir / "manifest.json")
|
||||||
resolved_source_root = _resolve_source_root(import_dir, manifest.get("source_root", ""))
|
resolved_source_root = _resolve_source_root(import_dir, manifest.get("source_root", ""))
|
||||||
bibliography_index = load_bibliography_index(resolved_source_root) if resolved_source_root else {}
|
resolved_bibliography_root = _resolve_bibliography_root(import_dir, manifest, resolved_source_root)
|
||||||
|
bibliography_index = load_bibliography_index(resolved_bibliography_root) if resolved_bibliography_root else {}
|
||||||
lint_payload = _read_json(import_dir / "lint_findings.json")
|
lint_payload = _read_json(import_dir / "lint_findings.json")
|
||||||
queue_payload = _read_json(import_dir / "review_queue.json")
|
queue_payload = _read_json(import_dir / "review_queue.json")
|
||||||
graph_payload = _read_json(import_dir / "graph_diagnostics.json")
|
graph_payload = _read_json(import_dir / "graph_diagnostics.json")
|
||||||
|
|
@ -377,6 +391,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
|
||||||
artifact_citations, artifact_citation_summary = _artifact_citation_payloads(
|
artifact_citations, artifact_citation_summary = _artifact_citation_payloads(
|
||||||
artifacts,
|
artifacts,
|
||||||
source_root=resolved_source_root,
|
source_root=resolved_source_root,
|
||||||
|
bibliography_root=resolved_bibliography_root,
|
||||||
)
|
)
|
||||||
artifact_by_id = {item["artifact_id"]: item for item in artifacts}
|
artifact_by_id = {item["artifact_id"]: item for item in artifacts}
|
||||||
queue_by_candidate_id = {
|
queue_by_candidate_id = {
|
||||||
|
|
@ -510,7 +525,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
|
||||||
],
|
],
|
||||||
"concept_reviews": concept_reviews,
|
"concept_reviews": concept_reviews,
|
||||||
"citation_reviews": [entry.model_dump() for entry in session.citation_reviews],
|
"citation_reviews": [entry.model_dump() for entry in session.citation_reviews],
|
||||||
"bibliography": bibliography_summary_payload(resolved_source_root),
|
"bibliography": bibliography_summary_payload(resolved_bibliography_root),
|
||||||
"graph_diagnostics": graph_payload,
|
"graph_diagnostics": graph_payload,
|
||||||
"citations": {
|
"citations": {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
|
|
|
||||||
|
|
@ -139,3 +139,40 @@ def test_review_workspace_surfaces_local_bibliography_support_suggestions(tmp_pa
|
||||||
assert suggestions
|
assert suggestions
|
||||||
assert suggestions[0]["citation_key"] == "kimura1968evolutionary"
|
assert suggestions[0]["citation_key"] == "kimura1968evolutionary"
|
||||||
assert "abstract" in suggestions[0]["reason"].lower() or "title" in suggestions[0]["reason"].lower()
|
assert "abstract" in suggestions[0]["reason"].lower() or "title" in suggestions[0]["reason"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_workspace_can_use_separate_bibliography_root(tmp_path: Path) -> None:
|
||||||
|
root = tmp_path / "pilot"
|
||||||
|
source_root = root / "source"
|
||||||
|
bib_root = root / "bibliography"
|
||||||
|
(source_root / "wiki").mkdir(parents=True)
|
||||||
|
bib_root.mkdir(parents=True)
|
||||||
|
(source_root / "wiki" / "drift.md").write_text(
|
||||||
|
"# Drift\n\n"
|
||||||
|
"- Random genetic drift can dominate allele-frequency change in small populations.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(bib_root / "refs.bib").write_text(
|
||||||
|
"@article{kimura1968evolutionary,\n"
|
||||||
|
" author = {Motoo Kimura},\n"
|
||||||
|
" title = {Evolutionary Rate at the Molecular Level},\n"
|
||||||
|
" journal = {Nature},\n"
|
||||||
|
" year = {1968},\n"
|
||||||
|
" abstract = {The rate of molecular evolution is compatible with neutral changes driven by random genetic drift in populations.}\n"
|
||||||
|
"}\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
import_result = run_groundrecall_import(source_root, out_root=tmp_path / "imports", mode="quick", import_id="separate-bib-root")
|
||||||
|
manifest_path = import_result.out_dir / "manifest.json"
|
||||||
|
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||||
|
manifest["bibliography_root"] = str(bib_root)
|
||||||
|
manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
|
workspace = GroundRecallReviewWorkspace(import_result.out_dir)
|
||||||
|
payload = workspace.load_review_data()
|
||||||
|
concept_review = next(item for item in payload["concept_reviews"] if item["concept_id"] == "drift")
|
||||||
|
suggestions = concept_review["top_claims"][0]["support_suggestions"]
|
||||||
|
assert payload["bibliography"]["entry_count"] == 1
|
||||||
|
assert suggestions
|
||||||
|
assert suggestions[0]["citation_key"] == "kimura1968evolutionary"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue