Made knoweldge graphs first-class parts of domain packs.
This commit is contained in:
parent
51dd2b29ff
commit
41ca57d60f
|
|
@ -51,11 +51,17 @@ The pack emitter writes:
|
|||
- `conflict_report.md`
|
||||
- `license_attribution.json`
|
||||
- `source_corpus.json`
|
||||
- `knowledge_graph.json`
|
||||
|
||||
`source_corpus.json` is the main grounded-text artifact. It preserves lesson bodies, objectives,
|
||||
exercises, and source references from the ingested material so downstream tutoring or evaluation
|
||||
can rely on source-derived text instead of only the distilled concept graph.
|
||||
|
||||
`knowledge_graph.json` is the graph-first artifact. It preserves typed nodes and justified edges
|
||||
for sources, modules, lessons, concepts, assessment signals, and prerequisite/support relations.
|
||||
Later Didactopus retrieval and tutoring flows can use this graph to explain why a concept appears,
|
||||
what supports it, and which source material grounds it.
|
||||
|
||||
## Rule layer
|
||||
|
||||
The current default rules:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -14,3 +14,4 @@ profile_templates: {}
|
|||
cross_pack_links: []
|
||||
supporting_artifacts:
|
||||
- source_corpus.json
|
||||
- knowledge_graph.json
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -14,3 +14,4 @@ profile_templates: {}
|
|||
cross_pack_links: []
|
||||
supporting_artifacts:
|
||||
- source_corpus.json
|
||||
- knowledge_graph.json
|
||||
|
|
|
|||
|
|
@ -11,6 +11,12 @@
|
|||
],
|
||||
"concept_count": 34,
|
||||
"source_fragment_count": 60,
|
||||
"knowledge_graph_summary": {
|
||||
"node_count": 98,
|
||||
"edge_count": 178,
|
||||
"concept_count": 34,
|
||||
"source_count": 3
|
||||
},
|
||||
"target_concept": "mit-ocw-information-and-entropy::thermodynamics-and-entropy",
|
||||
"curriculum_path": [
|
||||
"mit-ocw-information-and-entropy::mit-ocw-6-050j-information-and-entropy-course-home",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphBundle:
|
||||
knowledge_graph: dict
|
||||
source_corpus: dict
|
||||
|
||||
|
||||
def concept_node_id(concept_id: str) -> str:
|
||||
return f"concept::{concept_id}"
|
||||
|
||||
|
||||
def _node_index(bundle: GraphBundle) -> dict[str, dict]:
|
||||
return {node["id"]: node for node in bundle.knowledge_graph.get("nodes", [])}
|
||||
|
||||
|
||||
def _edges(bundle: GraphBundle) -> list[dict]:
|
||||
return list(bundle.knowledge_graph.get("edges", []))
|
||||
|
||||
|
||||
def get_concept_node(bundle: GraphBundle, concept_id: str) -> dict | None:
|
||||
return _node_index(bundle).get(concept_node_id(concept_id))
|
||||
|
||||
|
||||
def concept_neighborhood(bundle: GraphBundle, concept_id: str) -> dict:
|
||||
node_id = concept_node_id(concept_id)
|
||||
nodes = _node_index(bundle)
|
||||
incoming = []
|
||||
outgoing = []
|
||||
for edge in _edges(bundle):
|
||||
if edge["target"] == node_id:
|
||||
incoming.append(edge)
|
||||
if edge["source"] == node_id:
|
||||
outgoing.append(edge)
|
||||
return {
|
||||
"concept": nodes.get(node_id, {}),
|
||||
"incoming": incoming,
|
||||
"outgoing": outgoing,
|
||||
"incoming_nodes": [nodes[edge["source"]] for edge in incoming if edge["source"] in nodes],
|
||||
"outgoing_nodes": [nodes[edge["target"]] for edge in outgoing if edge["target"] in nodes],
|
||||
}
|
||||
|
||||
|
||||
def source_fragments_for_concept(bundle: GraphBundle, concept_id: str, limit: int = 3) -> list[dict]:
|
||||
neighborhood = concept_neighborhood(bundle, concept_id)
|
||||
lesson_titles = {
|
||||
node.get("title", "")
|
||||
for node in neighborhood["incoming_nodes"]
|
||||
if node.get("type") == "lesson"
|
||||
}
|
||||
lesson_titles.update(
|
||||
node.get("title", "")
|
||||
for node in neighborhood["outgoing_nodes"]
|
||||
if node.get("type") == "lesson"
|
||||
)
|
||||
fragments = []
|
||||
for fragment in bundle.source_corpus.get("fragments", []):
|
||||
if fragment.get("lesson_title") in lesson_titles:
|
||||
fragments.append(fragment)
|
||||
if len(fragments) >= limit:
|
||||
break
|
||||
return fragments
|
||||
|
||||
|
||||
def prerequisite_titles(bundle: GraphBundle, concept_id: str) -> list[str]:
|
||||
neighborhood = concept_neighborhood(bundle, concept_id)
|
||||
titles = []
|
||||
seen = set()
|
||||
for edge, node in zip(neighborhood["incoming"], neighborhood["incoming_nodes"]):
|
||||
if edge.get("type") == "prerequisite":
|
||||
title = node.get("title", node.get("id", ""))
|
||||
if title not in seen:
|
||||
seen.add(title)
|
||||
titles.append(title)
|
||||
return titles
|
||||
|
||||
|
||||
def lesson_titles_for_concept(bundle: GraphBundle, concept_id: str) -> list[str]:
|
||||
neighborhood = concept_neighborhood(bundle, concept_id)
|
||||
titles = []
|
||||
seen = set()
|
||||
for edge, node in zip(neighborhood["incoming"], neighborhood["incoming_nodes"]):
|
||||
if edge.get("type") in {"supports_concept", "teaches_concept"} and node.get("type") == "lesson":
|
||||
title = node.get("title", node.get("id", ""))
|
||||
if title not in seen:
|
||||
seen.add(title)
|
||||
titles.append(title)
|
||||
return titles
|
||||
|
|
@ -0,0 +1,219 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from .course_schema import ConceptCandidate, NormalizedCourse
|
||||
|
||||
|
||||
def _slugify(text: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", text.strip().lower()).strip("-")
|
||||
return cleaned or "untitled"
|
||||
|
||||
|
||||
def _source_node_id(source_path: str) -> str:
|
||||
return f"source::{_slugify(source_path)}"
|
||||
|
||||
|
||||
def _module_node_id(module_title: str) -> str:
|
||||
return f"module::{_slugify(module_title)}"
|
||||
|
||||
|
||||
def _lesson_node_id(module_title: str, lesson_title: str) -> str:
|
||||
return f"lesson::{_slugify(module_title)}::{_slugify(lesson_title)}"
|
||||
|
||||
|
||||
def _concept_node_id(concept_id: str) -> str:
|
||||
return f"concept::{concept_id}"
|
||||
|
||||
|
||||
def _signal_node_id(kind: str, lesson_title: str, idx: int) -> str:
|
||||
return f"{kind}::{_slugify(lesson_title)}::{idx}"
|
||||
|
||||
|
||||
def _add_node(nodes: dict[str, dict], node_id: str, node_type: str, **attrs) -> None:
|
||||
node = nodes.setdefault(node_id, {"id": node_id, "type": node_type})
|
||||
for key, value in attrs.items():
|
||||
if value not in (None, "", [], {}):
|
||||
node[key] = value
|
||||
|
||||
|
||||
def _add_edge(edges: list[dict], source: str, target: str, edge_type: str, justification: str, provenance: list[str] | None = None, confidence: float = 1.0) -> None:
|
||||
edges.append(
|
||||
{
|
||||
"source": source,
|
||||
"target": target,
|
||||
"type": edge_type,
|
||||
"justification": justification,
|
||||
"provenance": list(provenance or []),
|
||||
"confidence": confidence,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def build_knowledge_graph(course: NormalizedCourse, concepts: list[ConceptCandidate]) -> dict:
|
||||
nodes: dict[str, dict] = {}
|
||||
edges: list[dict] = []
|
||||
|
||||
for source in course.source_records:
|
||||
source_id = _source_node_id(source.source_path)
|
||||
_add_node(
|
||||
nodes,
|
||||
source_id,
|
||||
"source",
|
||||
title=source.title or source.source_path,
|
||||
source_path=source.source_path,
|
||||
source_type=source.source_type,
|
||||
metadata=getattr(source, "metadata", {}),
|
||||
)
|
||||
|
||||
concept_ids = {concept.id for concept in concepts}
|
||||
for concept in concepts:
|
||||
concept_node_id = _concept_node_id(concept.id)
|
||||
_add_node(
|
||||
nodes,
|
||||
concept_node_id,
|
||||
"concept",
|
||||
title=concept.title,
|
||||
description=concept.description,
|
||||
source_modules=list(concept.source_modules),
|
||||
source_lessons=list(concept.source_lessons),
|
||||
source_courses=list(concept.source_courses),
|
||||
mastery_signals=list(concept.mastery_signals),
|
||||
)
|
||||
|
||||
for module in course.modules:
|
||||
module_id = _module_node_id(module.title)
|
||||
_add_node(nodes, module_id, "module", title=module.title)
|
||||
for lesson in module.lessons:
|
||||
lesson_id = _lesson_node_id(module.title, lesson.title)
|
||||
_add_node(
|
||||
nodes,
|
||||
lesson_id,
|
||||
"lesson",
|
||||
title=lesson.title,
|
||||
module_title=module.title,
|
||||
body=lesson.body,
|
||||
source_refs=list(lesson.source_refs),
|
||||
key_terms=list(lesson.key_terms),
|
||||
)
|
||||
_add_edge(
|
||||
edges,
|
||||
module_id,
|
||||
lesson_id,
|
||||
"contains_lesson",
|
||||
justification=f"Lesson '{lesson.title}' appears under module '{module.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
)
|
||||
for source_ref in lesson.source_refs:
|
||||
source_id = _source_node_id(source_ref)
|
||||
if source_id in nodes:
|
||||
_add_edge(
|
||||
edges,
|
||||
source_id,
|
||||
lesson_id,
|
||||
"derived_lesson",
|
||||
justification=f"Lesson '{lesson.title}' was ingested from source '{source_ref}'.",
|
||||
provenance=[source_ref],
|
||||
)
|
||||
|
||||
for idx, objective in enumerate(lesson.objectives, start=1):
|
||||
objective_id = _signal_node_id("objective", lesson.title, idx)
|
||||
_add_node(nodes, objective_id, "assessment_signal", title=objective, signal_kind="objective")
|
||||
_add_edge(
|
||||
edges,
|
||||
lesson_id,
|
||||
objective_id,
|
||||
"has_objective",
|
||||
justification=f"Objective {idx} was extracted from lesson '{lesson.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
)
|
||||
|
||||
for idx, exercise in enumerate(lesson.exercises, start=1):
|
||||
exercise_id = _signal_node_id("exercise", lesson.title, idx)
|
||||
_add_node(nodes, exercise_id, "assessment_signal", title=exercise, signal_kind="exercise")
|
||||
_add_edge(
|
||||
edges,
|
||||
lesson_id,
|
||||
exercise_id,
|
||||
"has_exercise",
|
||||
justification=f"Exercise {idx} was extracted from lesson '{lesson.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
)
|
||||
|
||||
lesson_concept_id = _concept_node_id(_slugify(lesson.title))
|
||||
if lesson_concept_id in nodes:
|
||||
_add_edge(
|
||||
edges,
|
||||
lesson_id,
|
||||
lesson_concept_id,
|
||||
"teaches_concept",
|
||||
justification=f"Lesson '{lesson.title}' yields the lesson-level concept '{lesson.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
)
|
||||
|
||||
for term in lesson.key_terms:
|
||||
term_id = _concept_node_id(_slugify(term))
|
||||
if term_id in nodes:
|
||||
_add_edge(
|
||||
edges,
|
||||
lesson_id,
|
||||
term_id,
|
||||
"mentions_concept",
|
||||
justification=f"Key term '{term}' was extracted from lesson '{lesson.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
confidence=0.7,
|
||||
)
|
||||
|
||||
for concept in concepts:
|
||||
concept_node_id = _concept_node_id(concept.id)
|
||||
for prereq in concept.prerequisites:
|
||||
prereq_id = _concept_node_id(prereq)
|
||||
if prereq_id in nodes:
|
||||
_add_edge(
|
||||
edges,
|
||||
prereq_id,
|
||||
concept_node_id,
|
||||
"prerequisite",
|
||||
justification=f"Concept '{concept.title}' depends on prerequisite '{prereq}'.",
|
||||
provenance=list(concept.source_courses),
|
||||
confidence=0.85,
|
||||
)
|
||||
for lesson_title in concept.source_lessons:
|
||||
lesson_sources = [module.title for module in course.modules if any(lesson.title == lesson_title for lesson in module.lessons)]
|
||||
for module in course.modules:
|
||||
for lesson in module.lessons:
|
||||
if lesson.title != lesson_title:
|
||||
continue
|
||||
lesson_id = _lesson_node_id(module.title, lesson.title)
|
||||
if concept_node_id in nodes and lesson_id in nodes:
|
||||
_add_edge(
|
||||
edges,
|
||||
lesson_id,
|
||||
concept_node_id,
|
||||
"supports_concept",
|
||||
justification=f"Concept '{concept.title}' was extracted from lesson '{lesson.title}'.",
|
||||
provenance=list(lesson.source_refs),
|
||||
confidence=0.9 if concept.id == _slugify(lesson.title) else 0.7,
|
||||
)
|
||||
|
||||
return {
|
||||
"course_title": course.title,
|
||||
"rights_note": course.rights_note,
|
||||
"summary": {
|
||||
"node_count": len(nodes),
|
||||
"edge_count": len(edges),
|
||||
"concept_count": len(concepts),
|
||||
"source_count": len(course.source_records),
|
||||
},
|
||||
"nodes": list(nodes.values()),
|
||||
"edges": edges,
|
||||
}
|
||||
|
||||
|
||||
def write_knowledge_graph(course: NormalizedCourse, concepts: list[ConceptCandidate], outdir: str | Path) -> None:
|
||||
out = Path(outdir)
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
payload = build_knowledge_graph(course, concepts)
|
||||
(out / "knowledge_graph.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||
|
|
@ -15,6 +15,7 @@ from .mastery_ledger import (
|
|||
export_capability_profile_json,
|
||||
export_capability_report_markdown,
|
||||
)
|
||||
from .knowledge_graph import write_knowledge_graph
|
||||
from .pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus
|
||||
from .rule_policy import RuleContext, build_default_rules, run_rules
|
||||
from .topic_ingest import build_topic_bundle, document_to_course, extract_concept_candidates, merge_courses_into_topic_course
|
||||
|
|
@ -167,6 +168,7 @@ def run_ocw_information_entropy_demo(
|
|||
)
|
||||
write_draft_pack(draft, pack_dir)
|
||||
write_source_corpus(merged, pack_dir)
|
||||
write_knowledge_graph(merged, ctx.concepts, pack_dir)
|
||||
if source_inventory.exists():
|
||||
inventory = load_sources(source_inventory)
|
||||
compliance_manifest = build_pack_compliance_manifest(draft.pack["name"], draft.pack["display_name"], inventory)
|
||||
|
|
@ -210,6 +212,7 @@ def run_ocw_information_entropy_demo(
|
|||
"review_flags": list(ctx.review_flags),
|
||||
"concept_count": len(ctx.concepts),
|
||||
"source_fragment_count": len(json.loads((pack_dir / "source_corpus.json").read_text(encoding="utf-8")).get("fragments", [])),
|
||||
"knowledge_graph_summary": json.loads((pack_dir / "knowledge_graph.json").read_text(encoding="utf-8")).get("summary", {}),
|
||||
"target_concept": target_key,
|
||||
"curriculum_path": concept_path,
|
||||
"mastered_concepts": sorted(state.mastered_concepts),
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from pathlib import Path
|
|||
import sys
|
||||
|
||||
from .config import load_config
|
||||
from .graph_retrieval import lesson_titles_for_concept, prerequisite_titles, source_fragments_for_concept
|
||||
from .model_provider import ModelProvider
|
||||
from .ocw_skill_agent_demo import load_ocw_skill_context
|
||||
from .role_prompts import evaluator_system_prompt, learner_system_prompt, mentor_system_prompt, practice_system_prompt
|
||||
|
|
@ -153,6 +154,34 @@ def _path_titles(context, limit: int | None = None) -> list[str]:
|
|||
return titles[:limit] if limit is not None else titles
|
||||
|
||||
|
||||
def _concept_by_title(context, title: str) -> dict | None:
|
||||
lowered = title.strip().lower()
|
||||
for concept in context.concepts:
|
||||
if str(concept.get("title", "")).strip().lower() == lowered:
|
||||
return concept
|
||||
return None
|
||||
|
||||
|
||||
def _grounding_text_for_title(context, title: str) -> str:
|
||||
concept = _concept_by_title(context, title)
|
||||
if concept is None:
|
||||
return ""
|
||||
concept_id = concept.get("id", "")
|
||||
prereqs = prerequisite_titles(context.graph_bundle, concept_id)
|
||||
lessons = lesson_titles_for_concept(context.graph_bundle, concept_id)
|
||||
fragments = source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
|
||||
fragment_lines = [fragment.get("text", "") for fragment in fragments if fragment.get("text")]
|
||||
lines = [
|
||||
f"Concept focus: {title}",
|
||||
f"Prerequisites: {', '.join(prereqs) if prereqs else 'none explicit'}",
|
||||
f"Supporting lessons: {', '.join(lessons) if lessons else title}",
|
||||
]
|
||||
if fragment_lines:
|
||||
lines.append("Grounding fragments:")
|
||||
lines.extend(f"- {line}" for line in fragment_lines)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _healthy_rolemesh_models(provider: ModelProvider) -> set[str]:
|
||||
config = provider.config
|
||||
if config.provider.lower() != "rolemesh":
|
||||
|
|
@ -240,6 +269,8 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
mentor_guidance = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, path_titles[1])}\n\n"
|
||||
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
|
||||
"Given the learner reflection, explain the first two concepts to study from the generated path and why. "
|
||||
f"Path reference: {path_titles[:4]}",
|
||||
role="mentor",
|
||||
|
|
@ -254,6 +285,8 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
practice_task = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, path_titles[1])}\n\n"
|
||||
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
|
||||
"Generate one short practice task that forces the learner to connect counting/probability with Shannon entropy, "
|
||||
"without giving away the full answer.",
|
||||
role="practice",
|
||||
|
|
@ -282,6 +315,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
evaluator_feedback = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
|
||||
"Evaluate this learner attempt for correctness, explanation quality, and limitations. "
|
||||
f"Task: {practice_task}\nAttempt: {learner_attempt}",
|
||||
role="evaluator",
|
||||
|
|
@ -296,6 +330,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
mentor_next_step = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, 'Channel Capacity')}\n\n"
|
||||
"Given the evaluator feedback, tell the learner what to do next before moving on to channel capacity. "
|
||||
"Use the course path to show what comes next.",
|
||||
role="mentor",
|
||||
|
|
@ -313,18 +348,21 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
"topic": "Channel Capacity",
|
||||
"path_slice": path_titles[4:7] or path_titles,
|
||||
"practice_anchor": "binary symmetric channel",
|
||||
"grounding_title": "Channel Capacity",
|
||||
"required_terms": ["channel", "capacity", "entropy", "noise"],
|
||||
},
|
||||
{
|
||||
"topic": "Coding and Compression",
|
||||
"path_slice": path_titles[5:9] or path_titles,
|
||||
"practice_anchor": "compression and error-correcting code",
|
||||
"grounding_title": "Source Coding and Compression",
|
||||
"required_terms": ["coding", "compression", "redundancy", "error"],
|
||||
},
|
||||
{
|
||||
"topic": "Thermodynamic Entropy and Synthesis",
|
||||
"path_slice": path_titles[8:] or path_titles,
|
||||
"practice_anchor": "thermodynamic entropy",
|
||||
"grounding_title": "Thermodynamics and Entropy",
|
||||
"required_terms": ["thermodynamic", "entropy", "information", "physical"],
|
||||
},
|
||||
]
|
||||
|
|
@ -332,6 +370,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
for stage in stage_specs:
|
||||
mentor_stage = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
|
||||
f"The learner is continuing through the MIT OCW Information and Entropy course. "
|
||||
f"Bridge from the previous work into {stage['topic']}. "
|
||||
f"Reference this path segment: {stage['path_slice']}. "
|
||||
|
|
@ -362,6 +401,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
practice_stage = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
|
||||
f"Create one short reasoning task about {stage['practice_anchor']} for the learner. "
|
||||
"Keep it course-relevant and do not provide the full solution.",
|
||||
role="practice",
|
||||
|
|
@ -376,6 +416,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
|
||||
evaluator_stage = _generate_checked(
|
||||
provider,
|
||||
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
|
||||
f"Give short evaluator feedback on this learner reflection in the context of {stage['topic']}: "
|
||||
f"{learner_stage}\nTask context: {practice_stage}",
|
||||
role="evaluator",
|
||||
|
|
@ -393,6 +434,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
|
|||
"skill": context.skill_name,
|
||||
"course": context.pack.get("display_name", context.pack.get("name", "")),
|
||||
"curriculum_path_titles": path_titles,
|
||||
"graph_grounding_summary": context.run_summary.get("knowledge_graph_summary", {}),
|
||||
"role_fallbacks": role_fallbacks,
|
||||
"status_updates": status_updates,
|
||||
"transcript": turns,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from pathlib import Path
|
|||
import yaml
|
||||
|
||||
from .evaluator_pipeline import CritiqueEvaluator, LearnerAttempt, RubricEvaluator, SymbolicRuleEvaluator, aggregate, run_pipeline
|
||||
from .graph_retrieval import GraphBundle, lesson_titles_for_concept, prerequisite_titles, source_fragments_for_concept
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -17,6 +18,7 @@ class SkillContext:
|
|||
capability_summary: str
|
||||
pack: dict
|
||||
concepts: list[dict]
|
||||
graph_bundle: GraphBundle
|
||||
capability_profile: dict
|
||||
run_summary: dict
|
||||
|
||||
|
|
@ -46,6 +48,10 @@ def load_ocw_skill_context(skill_dir: str | Path) -> SkillContext:
|
|||
capability_summary=(skill_dir / "references" / "generated-capability-summary.md").read_text(encoding="utf-8"),
|
||||
pack=yaml.safe_load((pack_dir / "pack.yaml").read_text(encoding="utf-8")) or {},
|
||||
concepts=(yaml.safe_load((pack_dir / "concepts.yaml").read_text(encoding="utf-8")) or {}).get("concepts", []),
|
||||
graph_bundle=GraphBundle(
|
||||
knowledge_graph=json.loads((pack_dir / "knowledge_graph.json").read_text(encoding="utf-8")),
|
||||
source_corpus=json.loads((pack_dir / "source_corpus.json").read_text(encoding="utf-8")),
|
||||
),
|
||||
capability_profile=json.loads((run_dir / "capability_profile.json").read_text(encoding="utf-8")),
|
||||
run_summary=json.loads((run_dir / "run_summary.json").read_text(encoding="utf-8")),
|
||||
)
|
||||
|
|
@ -88,8 +94,16 @@ def build_skill_grounded_study_plan(context: SkillContext, target_task: str) ->
|
|||
"concept_key": concept_key,
|
||||
"title": concept["title"],
|
||||
"status": "mastered" if concept_key in context.capability_profile.get("mastered_concepts", []) else "review-needed",
|
||||
"prerequisites": [
|
||||
_concept_key(pack_name, prereq) for prereq in concept.get("prerequisites", [])
|
||||
"prerequisites": [_concept_key(pack_name, prereq) for prereq in concept.get("prerequisites", [])],
|
||||
"prerequisite_titles": prerequisite_titles(context.graph_bundle, concept_id),
|
||||
"supporting_lessons": lesson_titles_for_concept(context.graph_bundle, concept_id),
|
||||
"source_fragments": [
|
||||
{
|
||||
"lesson_title": fragment.get("lesson_title", ""),
|
||||
"kind": fragment.get("kind", ""),
|
||||
"text": fragment.get("text", ""),
|
||||
}
|
||||
for fragment in source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
|
||||
],
|
||||
"recommended_action": (
|
||||
f"Use {concept['title']} as the primary teaching anchor."
|
||||
|
|
@ -115,10 +129,14 @@ def build_skill_grounded_explanation(context: SkillContext, concept_id: str) ->
|
|||
|
||||
concept_key = _concept_key(pack_name, concept_id)
|
||||
summary = context.capability_profile.get("evaluator_summary_by_concept", {}).get(concept_key, {})
|
||||
prereqs = prerequisite_titles(context.graph_bundle, concept_id)
|
||||
lessons = lesson_titles_for_concept(context.graph_bundle, concept_id)
|
||||
fragments = source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
|
||||
explanation = (
|
||||
f"{concept['title']} is represented in the Information and Entropy skill as part of a progression from "
|
||||
f"foundational probability ideas toward communication limits and physical interpretation. "
|
||||
f"It depends on {', '.join(concept.get('prerequisites', []) or ['no explicit prerequisites in the generated pack'])}. "
|
||||
f"It depends on {', '.join(prereqs or concept.get('prerequisites', []) or ['no explicit prerequisites in the generated pack'])}. "
|
||||
f"It is grounded by lessons such as {', '.join(lessons or [concept['title']])}. "
|
||||
f"The current demo learner already mastered this concept, with evaluator means {summary}, so the skill can use it as a stable explanation anchor."
|
||||
)
|
||||
return {
|
||||
|
|
@ -126,6 +144,17 @@ def build_skill_grounded_explanation(context: SkillContext, concept_id: str) ->
|
|||
"title": concept["title"],
|
||||
"explanation": explanation,
|
||||
"source_description": concept.get("description", ""),
|
||||
"grounding": {
|
||||
"supporting_lessons": lessons,
|
||||
"source_fragments": [
|
||||
{
|
||||
"lesson_title": fragment.get("lesson_title", ""),
|
||||
"kind": fragment.get("kind", ""),
|
||||
"text": fragment.get("text", ""),
|
||||
}
|
||||
for fragment in fragments
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -154,6 +183,7 @@ def evaluate_submission_with_skill(context: SkillContext, concept_id: str, submi
|
|||
"skill_reference": {
|
||||
"skill_name": context.skill_name,
|
||||
"mastered_by_demo_agent": mastered_reference,
|
||||
"supporting_lessons": lesson_titles_for_concept(context.graph_bundle, concept_id),
|
||||
},
|
||||
"follow_up": (
|
||||
"Extend the answer with an explicit limitation or assumption."
|
||||
|
|
@ -205,6 +235,7 @@ def run_ocw_skill_agent_demo(skill_dir: str | Path, out_dir: str | Path) -> dict
|
|||
"",
|
||||
"## Explanation Demo",
|
||||
explanation["explanation"],
|
||||
f"- Supporting lessons: {explanation['grounding']['supporting_lessons']}",
|
||||
"",
|
||||
"## Evaluation Demo",
|
||||
f"- Verdict: {evaluation['verdict']}",
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ def build_draft_pack(
|
|||
"overrides": [],
|
||||
"profile_templates": {},
|
||||
"cross_pack_links": [],
|
||||
"supporting_artifacts": ["source_corpus.json"],
|
||||
"supporting_artifacts": ["source_corpus.json", "knowledge_graph.json"],
|
||||
}
|
||||
concepts_yaml = {
|
||||
"concepts": [
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
|
||||
from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates
|
||||
from didactopus.knowledge_graph import build_knowledge_graph, write_knowledge_graph
|
||||
from didactopus.rule_policy import RuleContext, build_default_rules, run_rules
|
||||
|
||||
|
||||
SAMPLE = """
|
||||
# Sample Course
|
||||
|
||||
## Module 1
|
||||
### Lesson A
|
||||
- Objective: Explain Topic A.
|
||||
- Exercise: Do task A.
|
||||
Topic A body.
|
||||
|
||||
### Lesson B
|
||||
- Objective: Explain Topic B.
|
||||
Lesson B body.
|
||||
"""
|
||||
|
||||
|
||||
def test_build_knowledge_graph_contains_typed_nodes_and_edges(tmp_path: Path) -> None:
|
||||
course = parse_markdown_course(SAMPLE, "Sample Course")
|
||||
concepts = extract_concept_candidates(course)
|
||||
ctx = RuleContext(course=course, concepts=concepts)
|
||||
run_rules(ctx, build_default_rules())
|
||||
|
||||
payload = build_knowledge_graph(course, ctx.concepts)
|
||||
node_types = {node["type"] for node in payload["nodes"]}
|
||||
edge_types = {edge["type"] for edge in payload["edges"]}
|
||||
|
||||
assert payload["summary"]["concept_count"] >= 2
|
||||
assert "source" in node_types
|
||||
assert "lesson" in node_types
|
||||
assert "concept" in node_types
|
||||
assert "assessment_signal" in node_types
|
||||
assert "contains_lesson" in edge_types
|
||||
assert "teaches_concept" in edge_types or "supports_concept" in edge_types
|
||||
|
||||
write_knowledge_graph(course, ctx.concepts, tmp_path)
|
||||
written = json.loads((tmp_path / "knowledge_graph.json").read_text(encoding="utf-8"))
|
||||
assert written["summary"]["node_count"] >= len(payload["nodes"])
|
||||
|
|
@ -28,11 +28,13 @@ def test_ocw_rolemesh_transcript_demo_writes_artifacts(tmp_path: Path) -> None:
|
|||
assert payload["provider"] == "stub"
|
||||
assert len(payload["transcript"]) >= 16
|
||||
assert len(payload["curriculum_path_titles"]) >= 8
|
||||
assert payload["graph_grounding_summary"]["node_count"] >= 1
|
||||
assert payload["role_fallbacks"] == {}
|
||||
assert payload["status_updates"] == []
|
||||
assert any(turn["speaker"] == "Didactopus Evaluator" for turn in payload["transcript"])
|
||||
assert any("channel" in turn["content"].lower() for turn in payload["transcript"])
|
||||
assert any("thermodynamic" in turn["content"].lower() for turn in payload["transcript"])
|
||||
assert any("supporting lessons" in turn["content"].lower() or "grounding fragments" in turn["content"].lower() for turn in payload["transcript"])
|
||||
assert all(not _looks_truncated(turn["content"]) for turn in payload["transcript"])
|
||||
assert (tmp_path / "rolemesh_transcript.json").exists()
|
||||
assert (tmp_path / "rolemesh_transcript.md").exists()
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ def test_run_ocw_skill_agent_demo(tmp_path: Path) -> None:
|
|||
assert (tmp_path / "skill_demo.json").exists()
|
||||
assert (tmp_path / "skill_demo.md").exists()
|
||||
assert payload["study_plan"]["steps"]
|
||||
assert payload["study_plan"]["steps"][0]["supporting_lessons"]
|
||||
assert "grounding" in payload["explanation"]
|
||||
assert payload["explanation"]["grounding"]["supporting_lessons"]
|
||||
assert payload["evaluation"]["verdict"] in {"acceptable", "needs_revision"}
|
||||
|
||||
|
||||
|
|
@ -30,4 +33,5 @@ def test_skill_demo_flags_weak_submission() -> None:
|
|||
)
|
||||
|
||||
assert result["verdict"] == "needs_revision"
|
||||
assert result["skill_reference"]["supporting_lessons"]
|
||||
assert "Rework the answer" in result["follow_up"]
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from pathlib import Path
|
||||
from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates
|
||||
from didactopus.knowledge_graph import write_knowledge_graph
|
||||
from didactopus.rule_policy import RuleContext, build_default_rules, run_rules
|
||||
from didactopus.pack_emitter import build_draft_pack, write_draft_pack
|
||||
from didactopus.pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus
|
||||
|
||||
SAMPLE = '''
|
||||
# Sample Course
|
||||
|
|
@ -20,5 +21,9 @@ def test_emit_pack(tmp_path: Path) -> None:
|
|||
run_rules(ctx, build_default_rules())
|
||||
draft = build_draft_pack(course, ctx.concepts, "Tester", "REVIEW", ctx.review_flags)
|
||||
write_draft_pack(draft, tmp_path)
|
||||
write_source_corpus(course, tmp_path)
|
||||
write_knowledge_graph(course, ctx.concepts, tmp_path)
|
||||
assert (tmp_path / "pack.yaml").exists()
|
||||
assert (tmp_path / "review_report.md").exists()
|
||||
assert (tmp_path / "source_corpus.json").exists()
|
||||
assert (tmp_path / "knowledge_graph.json").exists()
|
||||
|
|
|
|||
Loading…
Reference in New Issue