diff --git a/src/didactopus/main.py b/src/didactopus/main.py index 2b482b1..3295326 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -7,6 +7,7 @@ from pathlib import Path from .config import load_config from .doclift_bundle_demo import run_doclift_bundle_demo from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall +from .notebook_page import export_notebook_page_from_groundrecall_bundle from .review_loader import load_draft_pack from .review_schema import ReviewSession, ReviewAction from .review_actions import apply_action @@ -48,6 +49,13 @@ def build_parser() -> argparse.ArgumentParser: doclift_gr_parser.add_argument("--course-title", required=True) doclift_gr_parser.add_argument("--author", default="doclift bundle import") doclift_gr_parser.add_argument("--license-name", default="See source bundle metadata") + + notebook_parser = subparsers.add_parser( + "notebook-page", + help="Build a Notebook page payload from a GroundRecall query bundle", + ) + notebook_parser.add_argument("groundrecall_query_bundle") + notebook_parser.add_argument("output_path") return parser @@ -120,4 +128,11 @@ def main() -> None: ) print(summary) return + if args.command == "notebook-page": + summary = export_notebook_page_from_groundrecall_bundle( + args.groundrecall_query_bundle, + args.output_path, + ) + print(summary) + return build_parser().print_help() diff --git a/src/didactopus/notebook_page.py b/src/didactopus/notebook_page.py new file mode 100644 index 0000000..bd6bfde --- /dev/null +++ b/src/didactopus/notebook_page.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +_ANTECEDENT_TYPES = {"prerequisite", "historical_predecessor"} +_DERIVATIVE_TYPES = {"historical_successor"} + + +def _concept_entry(concept: dict[str, Any], relation_types: set[str] | None = None) -> dict[str, Any]: + entry = { + "concept_id": concept.get("concept_id", ""), + "title": concept.get("title", ""), + "description": concept.get("description", ""), + } + if relation_types: + entry["relation_types"] = sorted(relation_types) + return entry + + +def _bucket_relation( + relation: dict[str, Any], + concept_id: str, + concepts_by_id: dict[str, dict[str, Any]], +) -> tuple[str | None, dict[str, Any] | None]: + source_id = str(relation.get("source_id", "")) + target_id = str(relation.get("target_id", "")) + relation_type = str(relation.get("relation_type", "")).strip() or "related_to" + if concept_id not in {source_id, target_id}: + return None, None + + other_id = target_id if source_id == concept_id else source_id + other = concepts_by_id.get(other_id) + if other is None: + return None, None + + if relation_type in _ANTECEDENT_TYPES: + bucket = "antecedent_concepts" if target_id == concept_id else "derivative_concepts" + elif relation_type in _DERIVATIVE_TYPES: + bucket = "derivative_concepts" if source_id == concept_id else "antecedent_concepts" + else: + bucket = "closer_concepts" + + return bucket, _concept_entry(other, {relation_type}) + + +def _merge_bucket_entries(items: list[dict[str, Any]]) -> list[dict[str, Any]]: + merged: dict[str, dict[str, Any]] = {} + for item in items: + concept_id = str(item.get("concept_id", "")) + if not concept_id: + continue + existing = merged.setdefault( + concept_id, + { + "concept_id": concept_id, + "title": item.get("title", ""), + "description": item.get("description", ""), + "relation_types": [], + }, + ) + existing["relation_types"] = sorted(set(existing["relation_types"]) | set(item.get("relation_types", []))) + return list(merged.values()) + + +def _review_context(bundle: dict[str, Any]) -> dict[str, Any]: + review_candidates = bundle.get("review_candidates", []) or [] + graph_codes = sorted( + { + code + for item in review_candidates + for code in item.get("finding_codes", []) or [] + if "concept" in str(code) or "bridge" in str(code) or "component" in str(code) + } + ) + top_rationales = [str(item.get("rationale", "")).strip() for item in review_candidates if str(item.get("rationale", "")).strip()][:3] + return { + "review_candidate_count": len(review_candidates), + "graph_codes": graph_codes, + "top_rationales": top_rationales, + } + + +def _supporting_sources(bundle: dict[str, Any]) -> list[dict[str, Any]]: + artifacts = bundle.get("source_artifacts", []) or [] + observations = bundle.get("supporting_observations", []) or [] + by_origin: dict[str, int] = {} + for observation in observations: + origin_path = str(observation.get("origin_path", "")).strip() + if origin_path: + by_origin[origin_path] = by_origin.get(origin_path, 0) + 1 + + sources = [] + for artifact in artifacts: + path = str(artifact.get("path", "")).strip() + sources.append( + { + "artifact_id": artifact.get("artifact_id", ""), + "title": artifact.get("title", ""), + "path": path, + "artifact_kind": artifact.get("artifact_kind", ""), + "supporting_observation_count": by_origin.get(path, 0), + } + ) + return sources + + +def _illustration_opportunities(bundle: dict[str, Any], navigation: dict[str, list[dict[str, Any]]]) -> list[dict[str, Any]]: + concept = bundle.get("concept", {}) or {} + concept_title = str(concept.get("title", "")).strip() or str(concept.get("concept_id", "")).strip() + opportunities = [] + if navigation["antecedent_concepts"] or navigation["derivative_concepts"]: + opportunities.append( + { + "kind": "concept_path", + "target_concept_id": concept.get("concept_id", ""), + "purpose": f"Show how {concept_title} fits into a prerequisite or downstream concept path.", + "status": "planned", + } + ) + if navigation["closer_concepts"]: + titles = ", ".join(item["title"] for item in navigation["closer_concepts"][:3] if item.get("title")) + opportunities.append( + { + "kind": "comparison", + "target_concept_id": concept.get("concept_id", ""), + "purpose": f"Compare {concept_title} with nearby concepts: {titles}." if titles else f"Compare {concept_title} with nearby concepts.", + "status": "planned", + } + ) + if bundle.get("supporting_observations"): + opportunities.append( + { + "kind": "evidence_trace", + "target_concept_id": concept.get("concept_id", ""), + "purpose": f"Trace the evidence and claims currently grounding {concept_title}.", + "status": "planned", + } + ) + return opportunities + + +def build_notebook_page_from_groundrecall_bundle(bundle: dict[str, Any]) -> dict[str, Any]: + concept = bundle.get("concept", {}) or {} + concept_id = str(concept.get("concept_id", "")).strip() + concepts_by_id = {concept_id: concept} + for item in bundle.get("related_concepts", []) or []: + item_id = str(item.get("concept_id", "")).strip() + if item_id: + concepts_by_id[item_id] = item + + navigation: dict[str, list[dict[str, Any]]] = { + "antecedent_concepts": [], + "closer_concepts": [], + "derivative_concepts": [], + } + for relation in bundle.get("relations", []) or []: + bucket, entry = _bucket_relation(relation, concept_id, concepts_by_id) + if bucket and entry: + navigation[bucket].append(entry) + + navigation = {key: _merge_bucket_entries(items) for key, items in navigation.items()} + supporting_observations = bundle.get("supporting_observations", []) or [] + supporting_excerpts = [ + { + "observation_id": item.get("observation_id", ""), + "text": item.get("text", ""), + "origin_path": item.get("origin_path", ""), + "grounding_status": item.get("grounding_status", ""), + } + for item in supporting_observations[:5] + ] + + return { + "page_kind": "didactopus_notebook_page", + "concept": { + "concept_id": concept.get("concept_id", ""), + "title": concept.get("title", ""), + "description": concept.get("description", ""), + "aliases": concept.get("aliases", []) or [], + }, + "summary": { + "claim_count": len(bundle.get("relevant_claims", []) or []), + "supporting_observation_count": len(supporting_observations), + "related_concept_count": len(bundle.get("related_concepts", []) or []), + }, + "graph_navigation": navigation, + "supporting_sources": _supporting_sources(bundle), + "supporting_excerpts": supporting_excerpts, + "review_context": _review_context(bundle), + "illustration_opportunities": _illustration_opportunities(bundle, navigation), + "suggested_next_actions": bundle.get("suggested_next_actions", []) or [], + } + + +def export_notebook_page_from_groundrecall_bundle(bundle_path: str | Path, out_path: str | Path) -> dict[str, Any]: + bundle_file = Path(bundle_path) + payload = json.loads(bundle_file.read_text(encoding="utf-8")) + page = build_notebook_page_from_groundrecall_bundle(payload) + target = Path(out_path) + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(json.dumps(page, indent=2), encoding="utf-8") + return {"page_path": str(target), "page": page} diff --git a/tests/test_main_cli.py b/tests/test_main_cli.py index e631229..8c9ed3f 100755 --- a/tests/test_main_cli.py +++ b/tests/test_main_cli.py @@ -96,3 +96,31 @@ def test_main_legacy_review_mode_uses_review_parser(monkeypatch, tmp_path: Path) assert called["draft_pack"] == str(tmp_path / "draft") assert called["output_dir"] == str(tmp_path / "out") + + +def test_main_notebook_page_subcommand(monkeypatch, capsys, tmp_path: Path) -> None: + captured: dict = {} + + def _fake_export_notebook_page_from_groundrecall_bundle(bundle_path, out_path): + captured["bundle_path"] = str(bundle_path) + captured["out_path"] = str(out_path) + return {"page_path": str(out_path)} + + monkeypatch.setattr(main_module, "export_notebook_page_from_groundrecall_bundle", _fake_export_notebook_page_from_groundrecall_bundle) + monkeypatch.setattr( + main_module.sys, + "argv", + [ + "didactopus", + "notebook-page", + str(tmp_path / "groundrecall_query_bundle.json"), + str(tmp_path / "notebook_page.json"), + ], + ) + + main_module.main() + out = capsys.readouterr().out + + assert captured["bundle_path"].endswith("groundrecall_query_bundle.json") + assert captured["out_path"].endswith("notebook_page.json") + assert "page_path" in out diff --git a/tests/test_notebook_page.py b/tests/test_notebook_page.py new file mode 100644 index 0000000..d00289a --- /dev/null +++ b/tests/test_notebook_page.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from didactopus.notebook_page import ( + build_notebook_page_from_groundrecall_bundle, + export_notebook_page_from_groundrecall_bundle, +) + + +def _sample_bundle() -> dict: + return { + "bundle_kind": "groundrecall_query_bundle", + "concept": { + "concept_id": "concept::natural-selection", + "title": "Natural Selection", + "description": "Differential survival and reproduction.", + "aliases": ["selection"], + }, + "relevant_claims": [ + {"claim_id": "clm_001", "claim_text": "Selection can change trait frequencies."}, + {"claim_id": "clm_002", "claim_text": "Selection depends on heritable variation."}, + ], + "relations": [ + { + "relation_id": "rel_001", + "source_id": "concept::variation", + "target_id": "concept::natural-selection", + "relation_type": "prerequisite", + }, + { + "relation_id": "rel_002", + "source_id": "concept::natural-selection", + "target_id": "concept::adaptation", + "relation_type": "historical_successor", + }, + { + "relation_id": "rel_003", + "source_id": "concept::natural-selection", + "target_id": "concept::common-descent", + "relation_type": "supports", + }, + ], + "related_concepts": [ + { + "concept_id": "concept::variation", + "title": "Variation", + "description": "Differences among individuals.", + }, + { + "concept_id": "concept::adaptation", + "title": "Adaptation", + "description": "Traits fit to local conditions.", + }, + { + "concept_id": "concept::common-descent", + "title": "Common Descent", + "description": "Shared ancestry of organisms.", + }, + ], + "supporting_observations": [ + { + "observation_id": "obs_001", + "text": "Population differences can affect survival.", + "origin_path": "texts/futuyma/ch1.md", + "grounding_status": "grounded", + } + ], + "source_artifacts": [ + { + "artifact_id": "art_001", + "artifact_kind": "compiled_page", + "title": "Evolutionary Biology Chapter 1", + "path": "texts/futuyma/ch1.md", + } + ], + "review_candidates": [ + { + "candidate_id": "concept::natural-selection", + "finding_codes": ["bridge_concept"], + "rationale": "Natural Selection | lane=conflict_resolution | priority=12 | graph=bridge_concept", + } + ], + "suggested_next_actions": ["Inspect supporting observations before export."], + } + + +def test_build_notebook_page_buckets_graph_navigation() -> None: + page = build_notebook_page_from_groundrecall_bundle(_sample_bundle()) + + assert page["page_kind"] == "didactopus_notebook_page" + assert page["concept"]["title"] == "Natural Selection" + assert page["summary"]["claim_count"] == 2 + assert page["graph_navigation"]["antecedent_concepts"][0]["title"] == "Variation" + assert page["graph_navigation"]["derivative_concepts"][0]["title"] == "Adaptation" + assert page["graph_navigation"]["closer_concepts"][0]["title"] == "Common Descent" + assert page["supporting_sources"][0]["supporting_observation_count"] == 1 + assert page["review_context"]["graph_codes"] == ["bridge_concept"] + assert page["illustration_opportunities"] + + +def test_export_notebook_page_writes_json(tmp_path: Path) -> None: + bundle_path = tmp_path / "groundrecall_query_bundle.json" + out_path = tmp_path / "notebook_page.json" + bundle_path.write_text(json.dumps(_sample_bundle()), encoding="utf-8") + + payload = export_notebook_page_from_groundrecall_bundle(bundle_path, out_path) + + assert out_path.exists() + assert payload["page_path"].endswith("notebook_page.json") + written = json.loads(out_path.read_text(encoding="utf-8")) + assert written["concept"]["concept_id"] == "concept::natural-selection"