Didactopus/src/didactopus/notebook_page.py

232 lines
8.9 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
import sys
from typing import Any
_ANTECEDENT_TYPES = {"prerequisite", "historical_predecessor"}
_DERIVATIVE_TYPES = {"historical_successor"}
def _concept_entry(concept: dict[str, Any], relation_types: set[str] | None = None) -> dict[str, Any]:
entry = {
"concept_id": concept.get("concept_id", ""),
"title": concept.get("title", ""),
"description": concept.get("description", ""),
}
if relation_types:
entry["relation_types"] = sorted(relation_types)
return entry
def _bucket_relation(
relation: dict[str, Any],
concept_id: str,
concepts_by_id: dict[str, dict[str, Any]],
) -> tuple[str | None, dict[str, Any] | None]:
source_id = str(relation.get("source_id", ""))
target_id = str(relation.get("target_id", ""))
relation_type = str(relation.get("relation_type", "")).strip() or "related_to"
if concept_id not in {source_id, target_id}:
return None, None
other_id = target_id if source_id == concept_id else source_id
other = concepts_by_id.get(other_id)
if other is None:
return None, None
if relation_type in _ANTECEDENT_TYPES:
bucket = "antecedent_concepts" if target_id == concept_id else "derivative_concepts"
elif relation_type in _DERIVATIVE_TYPES:
bucket = "derivative_concepts" if source_id == concept_id else "antecedent_concepts"
else:
bucket = "closer_concepts"
return bucket, _concept_entry(other, {relation_type})
def _merge_bucket_entries(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
merged: dict[str, dict[str, Any]] = {}
for item in items:
concept_id = str(item.get("concept_id", ""))
if not concept_id:
continue
existing = merged.setdefault(
concept_id,
{
"concept_id": concept_id,
"title": item.get("title", ""),
"description": item.get("description", ""),
"relation_types": [],
},
)
existing["relation_types"] = sorted(set(existing["relation_types"]) | set(item.get("relation_types", [])))
return list(merged.values())
def _review_context(bundle: dict[str, Any]) -> dict[str, Any]:
review_candidates = bundle.get("review_candidates", []) or []
graph_codes = sorted(
{
code
for item in review_candidates
for code in item.get("finding_codes", []) or []
if "concept" in str(code) or "bridge" in str(code) or "component" in str(code)
}
)
top_rationales = [str(item.get("rationale", "")).strip() for item in review_candidates if str(item.get("rationale", "")).strip()][:3]
return {
"review_candidate_count": len(review_candidates),
"graph_codes": graph_codes,
"top_rationales": top_rationales,
}
def _supporting_sources(bundle: dict[str, Any]) -> list[dict[str, Any]]:
artifacts = bundle.get("source_artifacts", []) or []
observations = bundle.get("supporting_observations", []) or []
by_origin: dict[str, int] = {}
for observation in observations:
origin_path = str(observation.get("origin_path", "")).strip()
if origin_path:
by_origin[origin_path] = by_origin.get(origin_path, 0) + 1
sources = []
for artifact in artifacts:
path = str(artifact.get("path", "")).strip()
sources.append(
{
"artifact_id": artifact.get("artifact_id", ""),
"title": artifact.get("title", ""),
"path": path,
"artifact_kind": artifact.get("artifact_kind", ""),
"supporting_observation_count": by_origin.get(path, 0),
}
)
return sources
def _illustration_opportunities(bundle: dict[str, Any], navigation: dict[str, list[dict[str, Any]]]) -> list[dict[str, Any]]:
concept = bundle.get("concept", {}) or {}
concept_title = str(concept.get("title", "")).strip() or str(concept.get("concept_id", "")).strip()
opportunities = []
if navigation["antecedent_concepts"] or navigation["derivative_concepts"]:
opportunities.append(
{
"kind": "concept_path",
"target_concept_id": concept.get("concept_id", ""),
"purpose": f"Show how {concept_title} fits into a prerequisite or downstream concept path.",
"status": "planned",
}
)
if navigation["closer_concepts"]:
titles = ", ".join(item["title"] for item in navigation["closer_concepts"][:3] if item.get("title"))
opportunities.append(
{
"kind": "comparison",
"target_concept_id": concept.get("concept_id", ""),
"purpose": f"Compare {concept_title} with nearby concepts: {titles}." if titles else f"Compare {concept_title} with nearby concepts.",
"status": "planned",
}
)
if bundle.get("supporting_observations"):
opportunities.append(
{
"kind": "evidence_trace",
"target_concept_id": concept.get("concept_id", ""),
"purpose": f"Trace the evidence and claims currently grounding {concept_title}.",
"status": "planned",
}
)
return opportunities
def build_notebook_page_from_groundrecall_bundle(bundle: dict[str, Any]) -> dict[str, Any]:
concept = bundle.get("concept", {}) or {}
concept_id = str(concept.get("concept_id", "")).strip()
concepts_by_id = {concept_id: concept}
for item in bundle.get("related_concepts", []) or []:
item_id = str(item.get("concept_id", "")).strip()
if item_id:
concepts_by_id[item_id] = item
navigation: dict[str, list[dict[str, Any]]] = {
"antecedent_concepts": [],
"closer_concepts": [],
"derivative_concepts": [],
}
for relation in bundle.get("relations", []) or []:
bucket, entry = _bucket_relation(relation, concept_id, concepts_by_id)
if bucket and entry:
navigation[bucket].append(entry)
navigation = {key: _merge_bucket_entries(items) for key, items in navigation.items()}
supporting_observations = bundle.get("supporting_observations", []) or []
supporting_excerpts = [
{
"observation_id": item.get("observation_id", ""),
"text": item.get("text", ""),
"origin_path": item.get("origin_path", ""),
"grounding_status": item.get("grounding_status", ""),
}
for item in supporting_observations[:5]
]
return {
"page_kind": "didactopus_notebook_page",
"concept": {
"concept_id": concept.get("concept_id", ""),
"title": concept.get("title", ""),
"description": concept.get("description", ""),
"aliases": concept.get("aliases", []) or [],
},
"summary": {
"claim_count": len(bundle.get("relevant_claims", []) or []),
"supporting_observation_count": len(supporting_observations),
"related_concept_count": len(bundle.get("related_concepts", []) or []),
},
"graph_navigation": navigation,
"supporting_sources": _supporting_sources(bundle),
"supporting_excerpts": supporting_excerpts,
"review_context": _review_context(bundle),
"illustration_opportunities": _illustration_opportunities(bundle, navigation),
"suggested_next_actions": bundle.get("suggested_next_actions", []) or [],
}
def export_notebook_page_from_groundrecall_bundle(bundle_path: str | Path, out_path: str | Path) -> dict[str, Any]:
bundle_file = Path(bundle_path)
payload = json.loads(bundle_file.read_text(encoding="utf-8"))
page = build_notebook_page_from_groundrecall_bundle(payload)
target = Path(out_path)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(json.dumps(page, indent=2), encoding="utf-8")
return {"page_path": str(target), "page": page}
def export_notebook_page_from_groundrecall_store(
store_dir: str | Path,
concept_ref: str,
out_dir: str | Path,
) -> dict[str, Any]:
export_groundrecall_query_bundle = _load_groundrecall_export()
target = Path(out_dir)
target.mkdir(parents=True, exist_ok=True)
exported = export_groundrecall_query_bundle(store_dir, concept_ref, target)
page_path = target / "notebook_page.json"
page_result = export_notebook_page_from_groundrecall_bundle(exported["bundle_path"], page_path)
page_result["groundrecall_query_bundle_path"] = exported["bundle_path"]
page_result["concept_ref"] = concept_ref
return page_result
def _load_groundrecall_export():
groundrecall_src = Path("/home/netuser/bin/GroundRecall/src")
if groundrecall_src.exists():
sys.path.insert(0, str(groundrecall_src))
from groundrecall.export import export_groundrecall_query_bundle # type: ignore
return export_groundrecall_query_bundle