Add Notebook page builder from GroundRecall bundles
This commit is contained in:
parent
9d972d4144
commit
839590006a
|
|
@ -7,6 +7,7 @@ from pathlib import Path
|
||||||
from .config import load_config
|
from .config import load_config
|
||||||
from .doclift_bundle_demo import run_doclift_bundle_demo
|
from .doclift_bundle_demo import run_doclift_bundle_demo
|
||||||
from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall
|
from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall
|
||||||
|
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||||
from .review_loader import load_draft_pack
|
from .review_loader import load_draft_pack
|
||||||
from .review_schema import ReviewSession, ReviewAction
|
from .review_schema import ReviewSession, ReviewAction
|
||||||
from .review_actions import apply_action
|
from .review_actions import apply_action
|
||||||
|
|
@ -48,6 +49,13 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
doclift_gr_parser.add_argument("--course-title", required=True)
|
doclift_gr_parser.add_argument("--course-title", required=True)
|
||||||
doclift_gr_parser.add_argument("--author", default="doclift bundle import")
|
doclift_gr_parser.add_argument("--author", default="doclift bundle import")
|
||||||
doclift_gr_parser.add_argument("--license-name", default="See source bundle metadata")
|
doclift_gr_parser.add_argument("--license-name", default="See source bundle metadata")
|
||||||
|
|
||||||
|
notebook_parser = subparsers.add_parser(
|
||||||
|
"notebook-page",
|
||||||
|
help="Build a Notebook page payload from a GroundRecall query bundle",
|
||||||
|
)
|
||||||
|
notebook_parser.add_argument("groundrecall_query_bundle")
|
||||||
|
notebook_parser.add_argument("output_path")
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -120,4 +128,11 @@ def main() -> None:
|
||||||
)
|
)
|
||||||
print(summary)
|
print(summary)
|
||||||
return
|
return
|
||||||
|
if args.command == "notebook-page":
|
||||||
|
summary = export_notebook_page_from_groundrecall_bundle(
|
||||||
|
args.groundrecall_query_bundle,
|
||||||
|
args.output_path,
|
||||||
|
)
|
||||||
|
print(summary)
|
||||||
|
return
|
||||||
build_parser().print_help()
|
build_parser().print_help()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,205 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
_ANTECEDENT_TYPES = {"prerequisite", "historical_predecessor"}
|
||||||
|
_DERIVATIVE_TYPES = {"historical_successor"}
|
||||||
|
|
||||||
|
|
||||||
|
def _concept_entry(concept: dict[str, Any], relation_types: set[str] | None = None) -> dict[str, Any]:
|
||||||
|
entry = {
|
||||||
|
"concept_id": concept.get("concept_id", ""),
|
||||||
|
"title": concept.get("title", ""),
|
||||||
|
"description": concept.get("description", ""),
|
||||||
|
}
|
||||||
|
if relation_types:
|
||||||
|
entry["relation_types"] = sorted(relation_types)
|
||||||
|
return entry
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_relation(
|
||||||
|
relation: dict[str, Any],
|
||||||
|
concept_id: str,
|
||||||
|
concepts_by_id: dict[str, dict[str, Any]],
|
||||||
|
) -> tuple[str | None, dict[str, Any] | None]:
|
||||||
|
source_id = str(relation.get("source_id", ""))
|
||||||
|
target_id = str(relation.get("target_id", ""))
|
||||||
|
relation_type = str(relation.get("relation_type", "")).strip() or "related_to"
|
||||||
|
if concept_id not in {source_id, target_id}:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
other_id = target_id if source_id == concept_id else source_id
|
||||||
|
other = concepts_by_id.get(other_id)
|
||||||
|
if other is None:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
if relation_type in _ANTECEDENT_TYPES:
|
||||||
|
bucket = "antecedent_concepts" if target_id == concept_id else "derivative_concepts"
|
||||||
|
elif relation_type in _DERIVATIVE_TYPES:
|
||||||
|
bucket = "derivative_concepts" if source_id == concept_id else "antecedent_concepts"
|
||||||
|
else:
|
||||||
|
bucket = "closer_concepts"
|
||||||
|
|
||||||
|
return bucket, _concept_entry(other, {relation_type})
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_bucket_entries(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
merged: dict[str, dict[str, Any]] = {}
|
||||||
|
for item in items:
|
||||||
|
concept_id = str(item.get("concept_id", ""))
|
||||||
|
if not concept_id:
|
||||||
|
continue
|
||||||
|
existing = merged.setdefault(
|
||||||
|
concept_id,
|
||||||
|
{
|
||||||
|
"concept_id": concept_id,
|
||||||
|
"title": item.get("title", ""),
|
||||||
|
"description": item.get("description", ""),
|
||||||
|
"relation_types": [],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
existing["relation_types"] = sorted(set(existing["relation_types"]) | set(item.get("relation_types", [])))
|
||||||
|
return list(merged.values())
|
||||||
|
|
||||||
|
|
||||||
|
def _review_context(bundle: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
review_candidates = bundle.get("review_candidates", []) or []
|
||||||
|
graph_codes = sorted(
|
||||||
|
{
|
||||||
|
code
|
||||||
|
for item in review_candidates
|
||||||
|
for code in item.get("finding_codes", []) or []
|
||||||
|
if "concept" in str(code) or "bridge" in str(code) or "component" in str(code)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
top_rationales = [str(item.get("rationale", "")).strip() for item in review_candidates if str(item.get("rationale", "")).strip()][:3]
|
||||||
|
return {
|
||||||
|
"review_candidate_count": len(review_candidates),
|
||||||
|
"graph_codes": graph_codes,
|
||||||
|
"top_rationales": top_rationales,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _supporting_sources(bundle: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
artifacts = bundle.get("source_artifacts", []) or []
|
||||||
|
observations = bundle.get("supporting_observations", []) or []
|
||||||
|
by_origin: dict[str, int] = {}
|
||||||
|
for observation in observations:
|
||||||
|
origin_path = str(observation.get("origin_path", "")).strip()
|
||||||
|
if origin_path:
|
||||||
|
by_origin[origin_path] = by_origin.get(origin_path, 0) + 1
|
||||||
|
|
||||||
|
sources = []
|
||||||
|
for artifact in artifacts:
|
||||||
|
path = str(artifact.get("path", "")).strip()
|
||||||
|
sources.append(
|
||||||
|
{
|
||||||
|
"artifact_id": artifact.get("artifact_id", ""),
|
||||||
|
"title": artifact.get("title", ""),
|
||||||
|
"path": path,
|
||||||
|
"artifact_kind": artifact.get("artifact_kind", ""),
|
||||||
|
"supporting_observation_count": by_origin.get(path, 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return sources
|
||||||
|
|
||||||
|
|
||||||
|
def _illustration_opportunities(bundle: dict[str, Any], navigation: dict[str, list[dict[str, Any]]]) -> list[dict[str, Any]]:
|
||||||
|
concept = bundle.get("concept", {}) or {}
|
||||||
|
concept_title = str(concept.get("title", "")).strip() or str(concept.get("concept_id", "")).strip()
|
||||||
|
opportunities = []
|
||||||
|
if navigation["antecedent_concepts"] or navigation["derivative_concepts"]:
|
||||||
|
opportunities.append(
|
||||||
|
{
|
||||||
|
"kind": "concept_path",
|
||||||
|
"target_concept_id": concept.get("concept_id", ""),
|
||||||
|
"purpose": f"Show how {concept_title} fits into a prerequisite or downstream concept path.",
|
||||||
|
"status": "planned",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if navigation["closer_concepts"]:
|
||||||
|
titles = ", ".join(item["title"] for item in navigation["closer_concepts"][:3] if item.get("title"))
|
||||||
|
opportunities.append(
|
||||||
|
{
|
||||||
|
"kind": "comparison",
|
||||||
|
"target_concept_id": concept.get("concept_id", ""),
|
||||||
|
"purpose": f"Compare {concept_title} with nearby concepts: {titles}." if titles else f"Compare {concept_title} with nearby concepts.",
|
||||||
|
"status": "planned",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if bundle.get("supporting_observations"):
|
||||||
|
opportunities.append(
|
||||||
|
{
|
||||||
|
"kind": "evidence_trace",
|
||||||
|
"target_concept_id": concept.get("concept_id", ""),
|
||||||
|
"purpose": f"Trace the evidence and claims currently grounding {concept_title}.",
|
||||||
|
"status": "planned",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return opportunities
|
||||||
|
|
||||||
|
|
||||||
|
def build_notebook_page_from_groundrecall_bundle(bundle: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
concept = bundle.get("concept", {}) or {}
|
||||||
|
concept_id = str(concept.get("concept_id", "")).strip()
|
||||||
|
concepts_by_id = {concept_id: concept}
|
||||||
|
for item in bundle.get("related_concepts", []) or []:
|
||||||
|
item_id = str(item.get("concept_id", "")).strip()
|
||||||
|
if item_id:
|
||||||
|
concepts_by_id[item_id] = item
|
||||||
|
|
||||||
|
navigation: dict[str, list[dict[str, Any]]] = {
|
||||||
|
"antecedent_concepts": [],
|
||||||
|
"closer_concepts": [],
|
||||||
|
"derivative_concepts": [],
|
||||||
|
}
|
||||||
|
for relation in bundle.get("relations", []) or []:
|
||||||
|
bucket, entry = _bucket_relation(relation, concept_id, concepts_by_id)
|
||||||
|
if bucket and entry:
|
||||||
|
navigation[bucket].append(entry)
|
||||||
|
|
||||||
|
navigation = {key: _merge_bucket_entries(items) for key, items in navigation.items()}
|
||||||
|
supporting_observations = bundle.get("supporting_observations", []) or []
|
||||||
|
supporting_excerpts = [
|
||||||
|
{
|
||||||
|
"observation_id": item.get("observation_id", ""),
|
||||||
|
"text": item.get("text", ""),
|
||||||
|
"origin_path": item.get("origin_path", ""),
|
||||||
|
"grounding_status": item.get("grounding_status", ""),
|
||||||
|
}
|
||||||
|
for item in supporting_observations[:5]
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"page_kind": "didactopus_notebook_page",
|
||||||
|
"concept": {
|
||||||
|
"concept_id": concept.get("concept_id", ""),
|
||||||
|
"title": concept.get("title", ""),
|
||||||
|
"description": concept.get("description", ""),
|
||||||
|
"aliases": concept.get("aliases", []) or [],
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"claim_count": len(bundle.get("relevant_claims", []) or []),
|
||||||
|
"supporting_observation_count": len(supporting_observations),
|
||||||
|
"related_concept_count": len(bundle.get("related_concepts", []) or []),
|
||||||
|
},
|
||||||
|
"graph_navigation": navigation,
|
||||||
|
"supporting_sources": _supporting_sources(bundle),
|
||||||
|
"supporting_excerpts": supporting_excerpts,
|
||||||
|
"review_context": _review_context(bundle),
|
||||||
|
"illustration_opportunities": _illustration_opportunities(bundle, navigation),
|
||||||
|
"suggested_next_actions": bundle.get("suggested_next_actions", []) or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def export_notebook_page_from_groundrecall_bundle(bundle_path: str | Path, out_path: str | Path) -> dict[str, Any]:
|
||||||
|
bundle_file = Path(bundle_path)
|
||||||
|
payload = json.loads(bundle_file.read_text(encoding="utf-8"))
|
||||||
|
page = build_notebook_page_from_groundrecall_bundle(payload)
|
||||||
|
target = Path(out_path)
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
target.write_text(json.dumps(page, indent=2), encoding="utf-8")
|
||||||
|
return {"page_path": str(target), "page": page}
|
||||||
|
|
@ -96,3 +96,31 @@ def test_main_legacy_review_mode_uses_review_parser(monkeypatch, tmp_path: Path)
|
||||||
|
|
||||||
assert called["draft_pack"] == str(tmp_path / "draft")
|
assert called["draft_pack"] == str(tmp_path / "draft")
|
||||||
assert called["output_dir"] == str(tmp_path / "out")
|
assert called["output_dir"] == str(tmp_path / "out")
|
||||||
|
|
||||||
|
|
||||||
|
def test_main_notebook_page_subcommand(monkeypatch, capsys, tmp_path: Path) -> None:
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
def _fake_export_notebook_page_from_groundrecall_bundle(bundle_path, out_path):
|
||||||
|
captured["bundle_path"] = str(bundle_path)
|
||||||
|
captured["out_path"] = str(out_path)
|
||||||
|
return {"page_path": str(out_path)}
|
||||||
|
|
||||||
|
monkeypatch.setattr(main_module, "export_notebook_page_from_groundrecall_bundle", _fake_export_notebook_page_from_groundrecall_bundle)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
main_module.sys,
|
||||||
|
"argv",
|
||||||
|
[
|
||||||
|
"didactopus",
|
||||||
|
"notebook-page",
|
||||||
|
str(tmp_path / "groundrecall_query_bundle.json"),
|
||||||
|
str(tmp_path / "notebook_page.json"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
main_module.main()
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
|
||||||
|
assert captured["bundle_path"].endswith("groundrecall_query_bundle.json")
|
||||||
|
assert captured["out_path"].endswith("notebook_page.json")
|
||||||
|
assert "page_path" in out
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from didactopus.notebook_page import (
|
||||||
|
build_notebook_page_from_groundrecall_bundle,
|
||||||
|
export_notebook_page_from_groundrecall_bundle,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_bundle() -> dict:
|
||||||
|
return {
|
||||||
|
"bundle_kind": "groundrecall_query_bundle",
|
||||||
|
"concept": {
|
||||||
|
"concept_id": "concept::natural-selection",
|
||||||
|
"title": "Natural Selection",
|
||||||
|
"description": "Differential survival and reproduction.",
|
||||||
|
"aliases": ["selection"],
|
||||||
|
},
|
||||||
|
"relevant_claims": [
|
||||||
|
{"claim_id": "clm_001", "claim_text": "Selection can change trait frequencies."},
|
||||||
|
{"claim_id": "clm_002", "claim_text": "Selection depends on heritable variation."},
|
||||||
|
],
|
||||||
|
"relations": [
|
||||||
|
{
|
||||||
|
"relation_id": "rel_001",
|
||||||
|
"source_id": "concept::variation",
|
||||||
|
"target_id": "concept::natural-selection",
|
||||||
|
"relation_type": "prerequisite",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"relation_id": "rel_002",
|
||||||
|
"source_id": "concept::natural-selection",
|
||||||
|
"target_id": "concept::adaptation",
|
||||||
|
"relation_type": "historical_successor",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"relation_id": "rel_003",
|
||||||
|
"source_id": "concept::natural-selection",
|
||||||
|
"target_id": "concept::common-descent",
|
||||||
|
"relation_type": "supports",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"related_concepts": [
|
||||||
|
{
|
||||||
|
"concept_id": "concept::variation",
|
||||||
|
"title": "Variation",
|
||||||
|
"description": "Differences among individuals.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"concept_id": "concept::adaptation",
|
||||||
|
"title": "Adaptation",
|
||||||
|
"description": "Traits fit to local conditions.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"concept_id": "concept::common-descent",
|
||||||
|
"title": "Common Descent",
|
||||||
|
"description": "Shared ancestry of organisms.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"supporting_observations": [
|
||||||
|
{
|
||||||
|
"observation_id": "obs_001",
|
||||||
|
"text": "Population differences can affect survival.",
|
||||||
|
"origin_path": "texts/futuyma/ch1.md",
|
||||||
|
"grounding_status": "grounded",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source_artifacts": [
|
||||||
|
{
|
||||||
|
"artifact_id": "art_001",
|
||||||
|
"artifact_kind": "compiled_page",
|
||||||
|
"title": "Evolutionary Biology Chapter 1",
|
||||||
|
"path": "texts/futuyma/ch1.md",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"review_candidates": [
|
||||||
|
{
|
||||||
|
"candidate_id": "concept::natural-selection",
|
||||||
|
"finding_codes": ["bridge_concept"],
|
||||||
|
"rationale": "Natural Selection | lane=conflict_resolution | priority=12 | graph=bridge_concept",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"suggested_next_actions": ["Inspect supporting observations before export."],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_notebook_page_buckets_graph_navigation() -> None:
|
||||||
|
page = build_notebook_page_from_groundrecall_bundle(_sample_bundle())
|
||||||
|
|
||||||
|
assert page["page_kind"] == "didactopus_notebook_page"
|
||||||
|
assert page["concept"]["title"] == "Natural Selection"
|
||||||
|
assert page["summary"]["claim_count"] == 2
|
||||||
|
assert page["graph_navigation"]["antecedent_concepts"][0]["title"] == "Variation"
|
||||||
|
assert page["graph_navigation"]["derivative_concepts"][0]["title"] == "Adaptation"
|
||||||
|
assert page["graph_navigation"]["closer_concepts"][0]["title"] == "Common Descent"
|
||||||
|
assert page["supporting_sources"][0]["supporting_observation_count"] == 1
|
||||||
|
assert page["review_context"]["graph_codes"] == ["bridge_concept"]
|
||||||
|
assert page["illustration_opportunities"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_notebook_page_writes_json(tmp_path: Path) -> None:
|
||||||
|
bundle_path = tmp_path / "groundrecall_query_bundle.json"
|
||||||
|
out_path = tmp_path / "notebook_page.json"
|
||||||
|
bundle_path.write_text(json.dumps(_sample_bundle()), encoding="utf-8")
|
||||||
|
|
||||||
|
payload = export_notebook_page_from_groundrecall_bundle(bundle_path, out_path)
|
||||||
|
|
||||||
|
assert out_path.exists()
|
||||||
|
assert payload["page_path"].endswith("notebook_page.json")
|
||||||
|
written = json.loads(out_path.read_text(encoding="utf-8"))
|
||||||
|
assert written["concept"]["concept_id"] == "concept::natural-selection"
|
||||||
Loading…
Reference in New Issue