232 lines
8.9 KiB
Python
232 lines
8.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
import sys
|
|
from typing import Any
|
|
|
|
|
|
_ANTECEDENT_TYPES = {"prerequisite", "historical_predecessor"}
|
|
_DERIVATIVE_TYPES = {"historical_successor"}
|
|
|
|
|
|
def _concept_entry(concept: dict[str, Any], relation_types: set[str] | None = None) -> dict[str, Any]:
|
|
entry = {
|
|
"concept_id": concept.get("concept_id", ""),
|
|
"title": concept.get("title", ""),
|
|
"description": concept.get("description", ""),
|
|
}
|
|
if relation_types:
|
|
entry["relation_types"] = sorted(relation_types)
|
|
return entry
|
|
|
|
|
|
def _bucket_relation(
|
|
relation: dict[str, Any],
|
|
concept_id: str,
|
|
concepts_by_id: dict[str, dict[str, Any]],
|
|
) -> tuple[str | None, dict[str, Any] | None]:
|
|
source_id = str(relation.get("source_id", ""))
|
|
target_id = str(relation.get("target_id", ""))
|
|
relation_type = str(relation.get("relation_type", "")).strip() or "related_to"
|
|
if concept_id not in {source_id, target_id}:
|
|
return None, None
|
|
|
|
other_id = target_id if source_id == concept_id else source_id
|
|
other = concepts_by_id.get(other_id)
|
|
if other is None:
|
|
return None, None
|
|
|
|
if relation_type in _ANTECEDENT_TYPES:
|
|
bucket = "antecedent_concepts" if target_id == concept_id else "derivative_concepts"
|
|
elif relation_type in _DERIVATIVE_TYPES:
|
|
bucket = "derivative_concepts" if source_id == concept_id else "antecedent_concepts"
|
|
else:
|
|
bucket = "closer_concepts"
|
|
|
|
return bucket, _concept_entry(other, {relation_type})
|
|
|
|
|
|
def _merge_bucket_entries(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
merged: dict[str, dict[str, Any]] = {}
|
|
for item in items:
|
|
concept_id = str(item.get("concept_id", ""))
|
|
if not concept_id:
|
|
continue
|
|
existing = merged.setdefault(
|
|
concept_id,
|
|
{
|
|
"concept_id": concept_id,
|
|
"title": item.get("title", ""),
|
|
"description": item.get("description", ""),
|
|
"relation_types": [],
|
|
},
|
|
)
|
|
existing["relation_types"] = sorted(set(existing["relation_types"]) | set(item.get("relation_types", [])))
|
|
return list(merged.values())
|
|
|
|
|
|
def _review_context(bundle: dict[str, Any]) -> dict[str, Any]:
|
|
review_candidates = bundle.get("review_candidates", []) or []
|
|
graph_codes = sorted(
|
|
{
|
|
code
|
|
for item in review_candidates
|
|
for code in item.get("finding_codes", []) or []
|
|
if "concept" in str(code) or "bridge" in str(code) or "component" in str(code)
|
|
}
|
|
)
|
|
top_rationales = [str(item.get("rationale", "")).strip() for item in review_candidates if str(item.get("rationale", "")).strip()][:3]
|
|
return {
|
|
"review_candidate_count": len(review_candidates),
|
|
"graph_codes": graph_codes,
|
|
"top_rationales": top_rationales,
|
|
}
|
|
|
|
|
|
def _supporting_sources(bundle: dict[str, Any]) -> list[dict[str, Any]]:
|
|
artifacts = bundle.get("source_artifacts", []) or []
|
|
observations = bundle.get("supporting_observations", []) or []
|
|
by_origin: dict[str, int] = {}
|
|
for observation in observations:
|
|
origin_path = str(observation.get("origin_path", "")).strip()
|
|
if origin_path:
|
|
by_origin[origin_path] = by_origin.get(origin_path, 0) + 1
|
|
|
|
sources = []
|
|
for artifact in artifacts:
|
|
path = str(artifact.get("path", "")).strip()
|
|
sources.append(
|
|
{
|
|
"artifact_id": artifact.get("artifact_id", ""),
|
|
"title": artifact.get("title", ""),
|
|
"path": path,
|
|
"artifact_kind": artifact.get("artifact_kind", ""),
|
|
"supporting_observation_count": by_origin.get(path, 0),
|
|
}
|
|
)
|
|
return sources
|
|
|
|
|
|
def _illustration_opportunities(bundle: dict[str, Any], navigation: dict[str, list[dict[str, Any]]]) -> list[dict[str, Any]]:
|
|
concept = bundle.get("concept", {}) or {}
|
|
concept_title = str(concept.get("title", "")).strip() or str(concept.get("concept_id", "")).strip()
|
|
opportunities = []
|
|
if navigation["antecedent_concepts"] or navigation["derivative_concepts"]:
|
|
opportunities.append(
|
|
{
|
|
"kind": "concept_path",
|
|
"target_concept_id": concept.get("concept_id", ""),
|
|
"purpose": f"Show how {concept_title} fits into a prerequisite or downstream concept path.",
|
|
"status": "planned",
|
|
}
|
|
)
|
|
if navigation["closer_concepts"]:
|
|
titles = ", ".join(item["title"] for item in navigation["closer_concepts"][:3] if item.get("title"))
|
|
opportunities.append(
|
|
{
|
|
"kind": "comparison",
|
|
"target_concept_id": concept.get("concept_id", ""),
|
|
"purpose": f"Compare {concept_title} with nearby concepts: {titles}." if titles else f"Compare {concept_title} with nearby concepts.",
|
|
"status": "planned",
|
|
}
|
|
)
|
|
if bundle.get("supporting_observations"):
|
|
opportunities.append(
|
|
{
|
|
"kind": "evidence_trace",
|
|
"target_concept_id": concept.get("concept_id", ""),
|
|
"purpose": f"Trace the evidence and claims currently grounding {concept_title}.",
|
|
"status": "planned",
|
|
}
|
|
)
|
|
return opportunities
|
|
|
|
|
|
def build_notebook_page_from_groundrecall_bundle(bundle: dict[str, Any]) -> dict[str, Any]:
|
|
concept = bundle.get("concept", {}) or {}
|
|
concept_id = str(concept.get("concept_id", "")).strip()
|
|
concepts_by_id = {concept_id: concept}
|
|
for item in bundle.get("related_concepts", []) or []:
|
|
item_id = str(item.get("concept_id", "")).strip()
|
|
if item_id:
|
|
concepts_by_id[item_id] = item
|
|
|
|
navigation: dict[str, list[dict[str, Any]]] = {
|
|
"antecedent_concepts": [],
|
|
"closer_concepts": [],
|
|
"derivative_concepts": [],
|
|
}
|
|
for relation in bundle.get("relations", []) or []:
|
|
bucket, entry = _bucket_relation(relation, concept_id, concepts_by_id)
|
|
if bucket and entry:
|
|
navigation[bucket].append(entry)
|
|
|
|
navigation = {key: _merge_bucket_entries(items) for key, items in navigation.items()}
|
|
supporting_observations = bundle.get("supporting_observations", []) or []
|
|
supporting_excerpts = [
|
|
{
|
|
"observation_id": item.get("observation_id", ""),
|
|
"text": item.get("text", ""),
|
|
"origin_path": item.get("origin_path", ""),
|
|
"grounding_status": item.get("grounding_status", ""),
|
|
}
|
|
for item in supporting_observations[:5]
|
|
]
|
|
|
|
return {
|
|
"page_kind": "didactopus_notebook_page",
|
|
"concept": {
|
|
"concept_id": concept.get("concept_id", ""),
|
|
"title": concept.get("title", ""),
|
|
"description": concept.get("description", ""),
|
|
"aliases": concept.get("aliases", []) or [],
|
|
},
|
|
"summary": {
|
|
"claim_count": len(bundle.get("relevant_claims", []) or []),
|
|
"supporting_observation_count": len(supporting_observations),
|
|
"related_concept_count": len(bundle.get("related_concepts", []) or []),
|
|
},
|
|
"graph_navigation": navigation,
|
|
"supporting_sources": _supporting_sources(bundle),
|
|
"supporting_excerpts": supporting_excerpts,
|
|
"review_context": _review_context(bundle),
|
|
"illustration_opportunities": _illustration_opportunities(bundle, navigation),
|
|
"suggested_next_actions": bundle.get("suggested_next_actions", []) or [],
|
|
}
|
|
|
|
|
|
def export_notebook_page_from_groundrecall_bundle(bundle_path: str | Path, out_path: str | Path) -> dict[str, Any]:
|
|
bundle_file = Path(bundle_path)
|
|
payload = json.loads(bundle_file.read_text(encoding="utf-8"))
|
|
page = build_notebook_page_from_groundrecall_bundle(payload)
|
|
target = Path(out_path)
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
target.write_text(json.dumps(page, indent=2), encoding="utf-8")
|
|
return {"page_path": str(target), "page": page}
|
|
|
|
|
|
def export_notebook_page_from_groundrecall_store(
|
|
store_dir: str | Path,
|
|
concept_ref: str,
|
|
out_dir: str | Path,
|
|
) -> dict[str, Any]:
|
|
export_groundrecall_query_bundle = _load_groundrecall_export()
|
|
target = Path(out_dir)
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
exported = export_groundrecall_query_bundle(store_dir, concept_ref, target)
|
|
page_path = target / "notebook_page.json"
|
|
page_result = export_notebook_page_from_groundrecall_bundle(exported["bundle_path"], page_path)
|
|
page_result["groundrecall_query_bundle_path"] = exported["bundle_path"]
|
|
page_result["concept_ref"] = concept_ref
|
|
return page_result
|
|
|
|
|
|
def _load_groundrecall_export():
|
|
groundrecall_src = Path("/home/netuser/bin/GroundRecall/src")
|
|
if groundrecall_src.exists():
|
|
sys.path.insert(0, str(groundrecall_src))
|
|
from groundrecall.export import export_groundrecall_query_bundle # type: ignore
|
|
|
|
return export_groundrecall_query_bundle
|