Made knoweldge graphs first-class parts of domain packs.

This commit is contained in:
welsberr 2026-03-16 17:46:33 -04:00
parent 51dd2b29ff
commit 41ca57d60f
16 changed files with 6294 additions and 5 deletions

View File

@ -51,11 +51,17 @@ The pack emitter writes:
- `conflict_report.md` - `conflict_report.md`
- `license_attribution.json` - `license_attribution.json`
- `source_corpus.json` - `source_corpus.json`
- `knowledge_graph.json`
`source_corpus.json` is the main grounded-text artifact. It preserves lesson bodies, objectives, `source_corpus.json` is the main grounded-text artifact. It preserves lesson bodies, objectives,
exercises, and source references from the ingested material so downstream tutoring or evaluation exercises, and source references from the ingested material so downstream tutoring or evaluation
can rely on source-derived text instead of only the distilled concept graph. can rely on source-derived text instead of only the distilled concept graph.
`knowledge_graph.json` is the graph-first artifact. It preserves typed nodes and justified edges
for sources, modules, lessons, concepts, assessment signals, and prerequisite/support relations.
Later Didactopus retrieval and tutoring flows can use this graph to explain why a concept appears,
what supports it, and which source material grounds it.
## Rule layer ## Rule layer
The current default rules: The current default rules:

File diff suppressed because it is too large Load Diff

View File

@ -14,3 +14,4 @@ profile_templates: {}
cross_pack_links: [] cross_pack_links: []
supporting_artifacts: supporting_artifacts:
- source_corpus.json - source_corpus.json
- knowledge_graph.json

File diff suppressed because it is too large Load Diff

View File

@ -14,3 +14,4 @@ profile_templates: {}
cross_pack_links: [] cross_pack_links: []
supporting_artifacts: supporting_artifacts:
- source_corpus.json - source_corpus.json
- knowledge_graph.json

View File

@ -11,6 +11,12 @@
], ],
"concept_count": 34, "concept_count": 34,
"source_fragment_count": 60, "source_fragment_count": 60,
"knowledge_graph_summary": {
"node_count": 98,
"edge_count": 178,
"concept_count": 34,
"source_count": 3
},
"target_concept": "mit-ocw-information-and-entropy::thermodynamics-and-entropy", "target_concept": "mit-ocw-information-and-entropy::thermodynamics-and-entropy",
"curriculum_path": [ "curriculum_path": [
"mit-ocw-information-and-entropy::mit-ocw-6-050j-information-and-entropy-course-home", "mit-ocw-information-and-entropy::mit-ocw-6-050j-information-and-entropy-course-home",

View File

@ -0,0 +1,91 @@
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class GraphBundle:
knowledge_graph: dict
source_corpus: dict
def concept_node_id(concept_id: str) -> str:
return f"concept::{concept_id}"
def _node_index(bundle: GraphBundle) -> dict[str, dict]:
return {node["id"]: node for node in bundle.knowledge_graph.get("nodes", [])}
def _edges(bundle: GraphBundle) -> list[dict]:
return list(bundle.knowledge_graph.get("edges", []))
def get_concept_node(bundle: GraphBundle, concept_id: str) -> dict | None:
return _node_index(bundle).get(concept_node_id(concept_id))
def concept_neighborhood(bundle: GraphBundle, concept_id: str) -> dict:
node_id = concept_node_id(concept_id)
nodes = _node_index(bundle)
incoming = []
outgoing = []
for edge in _edges(bundle):
if edge["target"] == node_id:
incoming.append(edge)
if edge["source"] == node_id:
outgoing.append(edge)
return {
"concept": nodes.get(node_id, {}),
"incoming": incoming,
"outgoing": outgoing,
"incoming_nodes": [nodes[edge["source"]] for edge in incoming if edge["source"] in nodes],
"outgoing_nodes": [nodes[edge["target"]] for edge in outgoing if edge["target"] in nodes],
}
def source_fragments_for_concept(bundle: GraphBundle, concept_id: str, limit: int = 3) -> list[dict]:
neighborhood = concept_neighborhood(bundle, concept_id)
lesson_titles = {
node.get("title", "")
for node in neighborhood["incoming_nodes"]
if node.get("type") == "lesson"
}
lesson_titles.update(
node.get("title", "")
for node in neighborhood["outgoing_nodes"]
if node.get("type") == "lesson"
)
fragments = []
for fragment in bundle.source_corpus.get("fragments", []):
if fragment.get("lesson_title") in lesson_titles:
fragments.append(fragment)
if len(fragments) >= limit:
break
return fragments
def prerequisite_titles(bundle: GraphBundle, concept_id: str) -> list[str]:
neighborhood = concept_neighborhood(bundle, concept_id)
titles = []
seen = set()
for edge, node in zip(neighborhood["incoming"], neighborhood["incoming_nodes"]):
if edge.get("type") == "prerequisite":
title = node.get("title", node.get("id", ""))
if title not in seen:
seen.add(title)
titles.append(title)
return titles
def lesson_titles_for_concept(bundle: GraphBundle, concept_id: str) -> list[str]:
neighborhood = concept_neighborhood(bundle, concept_id)
titles = []
seen = set()
for edge, node in zip(neighborhood["incoming"], neighborhood["incoming_nodes"]):
if edge.get("type") in {"supports_concept", "teaches_concept"} and node.get("type") == "lesson":
title = node.get("title", node.get("id", ""))
if title not in seen:
seen.add(title)
titles.append(title)
return titles

View File

@ -0,0 +1,219 @@
from __future__ import annotations
import json
import re
from pathlib import Path
from .course_schema import ConceptCandidate, NormalizedCourse
def _slugify(text: str) -> str:
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", text.strip().lower()).strip("-")
return cleaned or "untitled"
def _source_node_id(source_path: str) -> str:
return f"source::{_slugify(source_path)}"
def _module_node_id(module_title: str) -> str:
return f"module::{_slugify(module_title)}"
def _lesson_node_id(module_title: str, lesson_title: str) -> str:
return f"lesson::{_slugify(module_title)}::{_slugify(lesson_title)}"
def _concept_node_id(concept_id: str) -> str:
return f"concept::{concept_id}"
def _signal_node_id(kind: str, lesson_title: str, idx: int) -> str:
return f"{kind}::{_slugify(lesson_title)}::{idx}"
def _add_node(nodes: dict[str, dict], node_id: str, node_type: str, **attrs) -> None:
node = nodes.setdefault(node_id, {"id": node_id, "type": node_type})
for key, value in attrs.items():
if value not in (None, "", [], {}):
node[key] = value
def _add_edge(edges: list[dict], source: str, target: str, edge_type: str, justification: str, provenance: list[str] | None = None, confidence: float = 1.0) -> None:
edges.append(
{
"source": source,
"target": target,
"type": edge_type,
"justification": justification,
"provenance": list(provenance or []),
"confidence": confidence,
}
)
def build_knowledge_graph(course: NormalizedCourse, concepts: list[ConceptCandidate]) -> dict:
nodes: dict[str, dict] = {}
edges: list[dict] = []
for source in course.source_records:
source_id = _source_node_id(source.source_path)
_add_node(
nodes,
source_id,
"source",
title=source.title or source.source_path,
source_path=source.source_path,
source_type=source.source_type,
metadata=getattr(source, "metadata", {}),
)
concept_ids = {concept.id for concept in concepts}
for concept in concepts:
concept_node_id = _concept_node_id(concept.id)
_add_node(
nodes,
concept_node_id,
"concept",
title=concept.title,
description=concept.description,
source_modules=list(concept.source_modules),
source_lessons=list(concept.source_lessons),
source_courses=list(concept.source_courses),
mastery_signals=list(concept.mastery_signals),
)
for module in course.modules:
module_id = _module_node_id(module.title)
_add_node(nodes, module_id, "module", title=module.title)
for lesson in module.lessons:
lesson_id = _lesson_node_id(module.title, lesson.title)
_add_node(
nodes,
lesson_id,
"lesson",
title=lesson.title,
module_title=module.title,
body=lesson.body,
source_refs=list(lesson.source_refs),
key_terms=list(lesson.key_terms),
)
_add_edge(
edges,
module_id,
lesson_id,
"contains_lesson",
justification=f"Lesson '{lesson.title}' appears under module '{module.title}'.",
provenance=list(lesson.source_refs),
)
for source_ref in lesson.source_refs:
source_id = _source_node_id(source_ref)
if source_id in nodes:
_add_edge(
edges,
source_id,
lesson_id,
"derived_lesson",
justification=f"Lesson '{lesson.title}' was ingested from source '{source_ref}'.",
provenance=[source_ref],
)
for idx, objective in enumerate(lesson.objectives, start=1):
objective_id = _signal_node_id("objective", lesson.title, idx)
_add_node(nodes, objective_id, "assessment_signal", title=objective, signal_kind="objective")
_add_edge(
edges,
lesson_id,
objective_id,
"has_objective",
justification=f"Objective {idx} was extracted from lesson '{lesson.title}'.",
provenance=list(lesson.source_refs),
)
for idx, exercise in enumerate(lesson.exercises, start=1):
exercise_id = _signal_node_id("exercise", lesson.title, idx)
_add_node(nodes, exercise_id, "assessment_signal", title=exercise, signal_kind="exercise")
_add_edge(
edges,
lesson_id,
exercise_id,
"has_exercise",
justification=f"Exercise {idx} was extracted from lesson '{lesson.title}'.",
provenance=list(lesson.source_refs),
)
lesson_concept_id = _concept_node_id(_slugify(lesson.title))
if lesson_concept_id in nodes:
_add_edge(
edges,
lesson_id,
lesson_concept_id,
"teaches_concept",
justification=f"Lesson '{lesson.title}' yields the lesson-level concept '{lesson.title}'.",
provenance=list(lesson.source_refs),
)
for term in lesson.key_terms:
term_id = _concept_node_id(_slugify(term))
if term_id in nodes:
_add_edge(
edges,
lesson_id,
term_id,
"mentions_concept",
justification=f"Key term '{term}' was extracted from lesson '{lesson.title}'.",
provenance=list(lesson.source_refs),
confidence=0.7,
)
for concept in concepts:
concept_node_id = _concept_node_id(concept.id)
for prereq in concept.prerequisites:
prereq_id = _concept_node_id(prereq)
if prereq_id in nodes:
_add_edge(
edges,
prereq_id,
concept_node_id,
"prerequisite",
justification=f"Concept '{concept.title}' depends on prerequisite '{prereq}'.",
provenance=list(concept.source_courses),
confidence=0.85,
)
for lesson_title in concept.source_lessons:
lesson_sources = [module.title for module in course.modules if any(lesson.title == lesson_title for lesson in module.lessons)]
for module in course.modules:
for lesson in module.lessons:
if lesson.title != lesson_title:
continue
lesson_id = _lesson_node_id(module.title, lesson.title)
if concept_node_id in nodes and lesson_id in nodes:
_add_edge(
edges,
lesson_id,
concept_node_id,
"supports_concept",
justification=f"Concept '{concept.title}' was extracted from lesson '{lesson.title}'.",
provenance=list(lesson.source_refs),
confidence=0.9 if concept.id == _slugify(lesson.title) else 0.7,
)
return {
"course_title": course.title,
"rights_note": course.rights_note,
"summary": {
"node_count": len(nodes),
"edge_count": len(edges),
"concept_count": len(concepts),
"source_count": len(course.source_records),
},
"nodes": list(nodes.values()),
"edges": edges,
}
def write_knowledge_graph(course: NormalizedCourse, concepts: list[ConceptCandidate], outdir: str | Path) -> None:
out = Path(outdir)
out.mkdir(parents=True, exist_ok=True)
payload = build_knowledge_graph(course, concepts)
(out / "knowledge_graph.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")

View File

@ -15,6 +15,7 @@ from .mastery_ledger import (
export_capability_profile_json, export_capability_profile_json,
export_capability_report_markdown, export_capability_report_markdown,
) )
from .knowledge_graph import write_knowledge_graph
from .pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus from .pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus
from .rule_policy import RuleContext, build_default_rules, run_rules from .rule_policy import RuleContext, build_default_rules, run_rules
from .topic_ingest import build_topic_bundle, document_to_course, extract_concept_candidates, merge_courses_into_topic_course from .topic_ingest import build_topic_bundle, document_to_course, extract_concept_candidates, merge_courses_into_topic_course
@ -167,6 +168,7 @@ def run_ocw_information_entropy_demo(
) )
write_draft_pack(draft, pack_dir) write_draft_pack(draft, pack_dir)
write_source_corpus(merged, pack_dir) write_source_corpus(merged, pack_dir)
write_knowledge_graph(merged, ctx.concepts, pack_dir)
if source_inventory.exists(): if source_inventory.exists():
inventory = load_sources(source_inventory) inventory = load_sources(source_inventory)
compliance_manifest = build_pack_compliance_manifest(draft.pack["name"], draft.pack["display_name"], inventory) compliance_manifest = build_pack_compliance_manifest(draft.pack["name"], draft.pack["display_name"], inventory)
@ -210,6 +212,7 @@ def run_ocw_information_entropy_demo(
"review_flags": list(ctx.review_flags), "review_flags": list(ctx.review_flags),
"concept_count": len(ctx.concepts), "concept_count": len(ctx.concepts),
"source_fragment_count": len(json.loads((pack_dir / "source_corpus.json").read_text(encoding="utf-8")).get("fragments", [])), "source_fragment_count": len(json.loads((pack_dir / "source_corpus.json").read_text(encoding="utf-8")).get("fragments", [])),
"knowledge_graph_summary": json.loads((pack_dir / "knowledge_graph.json").read_text(encoding="utf-8")).get("summary", {}),
"target_concept": target_key, "target_concept": target_key,
"curriculum_path": concept_path, "curriculum_path": concept_path,
"mastered_concepts": sorted(state.mastered_concepts), "mastered_concepts": sorted(state.mastered_concepts),

View File

@ -5,6 +5,7 @@ from pathlib import Path
import sys import sys
from .config import load_config from .config import load_config
from .graph_retrieval import lesson_titles_for_concept, prerequisite_titles, source_fragments_for_concept
from .model_provider import ModelProvider from .model_provider import ModelProvider
from .ocw_skill_agent_demo import load_ocw_skill_context from .ocw_skill_agent_demo import load_ocw_skill_context
from .role_prompts import evaluator_system_prompt, learner_system_prompt, mentor_system_prompt, practice_system_prompt from .role_prompts import evaluator_system_prompt, learner_system_prompt, mentor_system_prompt, practice_system_prompt
@ -153,6 +154,34 @@ def _path_titles(context, limit: int | None = None) -> list[str]:
return titles[:limit] if limit is not None else titles return titles[:limit] if limit is not None else titles
def _concept_by_title(context, title: str) -> dict | None:
lowered = title.strip().lower()
for concept in context.concepts:
if str(concept.get("title", "")).strip().lower() == lowered:
return concept
return None
def _grounding_text_for_title(context, title: str) -> str:
concept = _concept_by_title(context, title)
if concept is None:
return ""
concept_id = concept.get("id", "")
prereqs = prerequisite_titles(context.graph_bundle, concept_id)
lessons = lesson_titles_for_concept(context.graph_bundle, concept_id)
fragments = source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
fragment_lines = [fragment.get("text", "") for fragment in fragments if fragment.get("text")]
lines = [
f"Concept focus: {title}",
f"Prerequisites: {', '.join(prereqs) if prereqs else 'none explicit'}",
f"Supporting lessons: {', '.join(lessons) if lessons else title}",
]
if fragment_lines:
lines.append("Grounding fragments:")
lines.extend(f"- {line}" for line in fragment_lines)
return "\n".join(lines)
def _healthy_rolemesh_models(provider: ModelProvider) -> set[str]: def _healthy_rolemesh_models(provider: ModelProvider) -> set[str]:
config = provider.config config = provider.config
if config.provider.lower() != "rolemesh": if config.provider.lower() != "rolemesh":
@ -240,6 +269,8 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
mentor_guidance = _generate_checked( mentor_guidance = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, path_titles[1])}\n\n"
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
"Given the learner reflection, explain the first two concepts to study from the generated path and why. " "Given the learner reflection, explain the first two concepts to study from the generated path and why. "
f"Path reference: {path_titles[:4]}", f"Path reference: {path_titles[:4]}",
role="mentor", role="mentor",
@ -254,6 +285,8 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
practice_task = _generate_checked( practice_task = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, path_titles[1])}\n\n"
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
"Generate one short practice task that forces the learner to connect counting/probability with Shannon entropy, " "Generate one short practice task that forces the learner to connect counting/probability with Shannon entropy, "
"without giving away the full answer.", "without giving away the full answer.",
role="practice", role="practice",
@ -282,6 +315,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
evaluator_feedback = _generate_checked( evaluator_feedback = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, path_titles[2])}\n\n"
"Evaluate this learner attempt for correctness, explanation quality, and limitations. " "Evaluate this learner attempt for correctness, explanation quality, and limitations. "
f"Task: {practice_task}\nAttempt: {learner_attempt}", f"Task: {practice_task}\nAttempt: {learner_attempt}",
role="evaluator", role="evaluator",
@ -296,6 +330,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
mentor_next_step = _generate_checked( mentor_next_step = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, 'Channel Capacity')}\n\n"
"Given the evaluator feedback, tell the learner what to do next before moving on to channel capacity. " "Given the evaluator feedback, tell the learner what to do next before moving on to channel capacity. "
"Use the course path to show what comes next.", "Use the course path to show what comes next.",
role="mentor", role="mentor",
@ -313,18 +348,21 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
"topic": "Channel Capacity", "topic": "Channel Capacity",
"path_slice": path_titles[4:7] or path_titles, "path_slice": path_titles[4:7] or path_titles,
"practice_anchor": "binary symmetric channel", "practice_anchor": "binary symmetric channel",
"grounding_title": "Channel Capacity",
"required_terms": ["channel", "capacity", "entropy", "noise"], "required_terms": ["channel", "capacity", "entropy", "noise"],
}, },
{ {
"topic": "Coding and Compression", "topic": "Coding and Compression",
"path_slice": path_titles[5:9] or path_titles, "path_slice": path_titles[5:9] or path_titles,
"practice_anchor": "compression and error-correcting code", "practice_anchor": "compression and error-correcting code",
"grounding_title": "Source Coding and Compression",
"required_terms": ["coding", "compression", "redundancy", "error"], "required_terms": ["coding", "compression", "redundancy", "error"],
}, },
{ {
"topic": "Thermodynamic Entropy and Synthesis", "topic": "Thermodynamic Entropy and Synthesis",
"path_slice": path_titles[8:] or path_titles, "path_slice": path_titles[8:] or path_titles,
"practice_anchor": "thermodynamic entropy", "practice_anchor": "thermodynamic entropy",
"grounding_title": "Thermodynamics and Entropy",
"required_terms": ["thermodynamic", "entropy", "information", "physical"], "required_terms": ["thermodynamic", "entropy", "information", "physical"],
}, },
] ]
@ -332,6 +370,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
for stage in stage_specs: for stage in stage_specs:
mentor_stage = _generate_checked( mentor_stage = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
f"The learner is continuing through the MIT OCW Information and Entropy course. " f"The learner is continuing through the MIT OCW Information and Entropy course. "
f"Bridge from the previous work into {stage['topic']}. " f"Bridge from the previous work into {stage['topic']}. "
f"Reference this path segment: {stage['path_slice']}. " f"Reference this path segment: {stage['path_slice']}. "
@ -362,6 +401,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
practice_stage = _generate_checked( practice_stage = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
f"Create one short reasoning task about {stage['practice_anchor']} for the learner. " f"Create one short reasoning task about {stage['practice_anchor']} for the learner. "
"Keep it course-relevant and do not provide the full solution.", "Keep it course-relevant and do not provide the full solution.",
role="practice", role="practice",
@ -376,6 +416,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
evaluator_stage = _generate_checked( evaluator_stage = _generate_checked(
provider, provider,
f"{_grounding_text_for_title(context, stage['grounding_title'])}\n\n"
f"Give short evaluator feedback on this learner reflection in the context of {stage['topic']}: " f"Give short evaluator feedback on this learner reflection in the context of {stage['topic']}: "
f"{learner_stage}\nTask context: {practice_stage}", f"{learner_stage}\nTask context: {practice_stage}",
role="evaluator", role="evaluator",
@ -393,6 +434,7 @@ def build_ocw_rolemesh_transcript(config_path: str | Path, skill_dir: str | Path
"skill": context.skill_name, "skill": context.skill_name,
"course": context.pack.get("display_name", context.pack.get("name", "")), "course": context.pack.get("display_name", context.pack.get("name", "")),
"curriculum_path_titles": path_titles, "curriculum_path_titles": path_titles,
"graph_grounding_summary": context.run_summary.get("knowledge_graph_summary", {}),
"role_fallbacks": role_fallbacks, "role_fallbacks": role_fallbacks,
"status_updates": status_updates, "status_updates": status_updates,
"transcript": turns, "transcript": turns,

View File

@ -7,6 +7,7 @@ from pathlib import Path
import yaml import yaml
from .evaluator_pipeline import CritiqueEvaluator, LearnerAttempt, RubricEvaluator, SymbolicRuleEvaluator, aggregate, run_pipeline from .evaluator_pipeline import CritiqueEvaluator, LearnerAttempt, RubricEvaluator, SymbolicRuleEvaluator, aggregate, run_pipeline
from .graph_retrieval import GraphBundle, lesson_titles_for_concept, prerequisite_titles, source_fragments_for_concept
@dataclass @dataclass
@ -17,6 +18,7 @@ class SkillContext:
capability_summary: str capability_summary: str
pack: dict pack: dict
concepts: list[dict] concepts: list[dict]
graph_bundle: GraphBundle
capability_profile: dict capability_profile: dict
run_summary: dict run_summary: dict
@ -46,6 +48,10 @@ def load_ocw_skill_context(skill_dir: str | Path) -> SkillContext:
capability_summary=(skill_dir / "references" / "generated-capability-summary.md").read_text(encoding="utf-8"), capability_summary=(skill_dir / "references" / "generated-capability-summary.md").read_text(encoding="utf-8"),
pack=yaml.safe_load((pack_dir / "pack.yaml").read_text(encoding="utf-8")) or {}, pack=yaml.safe_load((pack_dir / "pack.yaml").read_text(encoding="utf-8")) or {},
concepts=(yaml.safe_load((pack_dir / "concepts.yaml").read_text(encoding="utf-8")) or {}).get("concepts", []), concepts=(yaml.safe_load((pack_dir / "concepts.yaml").read_text(encoding="utf-8")) or {}).get("concepts", []),
graph_bundle=GraphBundle(
knowledge_graph=json.loads((pack_dir / "knowledge_graph.json").read_text(encoding="utf-8")),
source_corpus=json.loads((pack_dir / "source_corpus.json").read_text(encoding="utf-8")),
),
capability_profile=json.loads((run_dir / "capability_profile.json").read_text(encoding="utf-8")), capability_profile=json.loads((run_dir / "capability_profile.json").read_text(encoding="utf-8")),
run_summary=json.loads((run_dir / "run_summary.json").read_text(encoding="utf-8")), run_summary=json.loads((run_dir / "run_summary.json").read_text(encoding="utf-8")),
) )
@ -88,8 +94,16 @@ def build_skill_grounded_study_plan(context: SkillContext, target_task: str) ->
"concept_key": concept_key, "concept_key": concept_key,
"title": concept["title"], "title": concept["title"],
"status": "mastered" if concept_key in context.capability_profile.get("mastered_concepts", []) else "review-needed", "status": "mastered" if concept_key in context.capability_profile.get("mastered_concepts", []) else "review-needed",
"prerequisites": [ "prerequisites": [_concept_key(pack_name, prereq) for prereq in concept.get("prerequisites", [])],
_concept_key(pack_name, prereq) for prereq in concept.get("prerequisites", []) "prerequisite_titles": prerequisite_titles(context.graph_bundle, concept_id),
"supporting_lessons": lesson_titles_for_concept(context.graph_bundle, concept_id),
"source_fragments": [
{
"lesson_title": fragment.get("lesson_title", ""),
"kind": fragment.get("kind", ""),
"text": fragment.get("text", ""),
}
for fragment in source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
], ],
"recommended_action": ( "recommended_action": (
f"Use {concept['title']} as the primary teaching anchor." f"Use {concept['title']} as the primary teaching anchor."
@ -115,10 +129,14 @@ def build_skill_grounded_explanation(context: SkillContext, concept_id: str) ->
concept_key = _concept_key(pack_name, concept_id) concept_key = _concept_key(pack_name, concept_id)
summary = context.capability_profile.get("evaluator_summary_by_concept", {}).get(concept_key, {}) summary = context.capability_profile.get("evaluator_summary_by_concept", {}).get(concept_key, {})
prereqs = prerequisite_titles(context.graph_bundle, concept_id)
lessons = lesson_titles_for_concept(context.graph_bundle, concept_id)
fragments = source_fragments_for_concept(context.graph_bundle, concept_id, limit=2)
explanation = ( explanation = (
f"{concept['title']} is represented in the Information and Entropy skill as part of a progression from " f"{concept['title']} is represented in the Information and Entropy skill as part of a progression from "
f"foundational probability ideas toward communication limits and physical interpretation. " f"foundational probability ideas toward communication limits and physical interpretation. "
f"It depends on {', '.join(concept.get('prerequisites', []) or ['no explicit prerequisites in the generated pack'])}. " f"It depends on {', '.join(prereqs or concept.get('prerequisites', []) or ['no explicit prerequisites in the generated pack'])}. "
f"It is grounded by lessons such as {', '.join(lessons or [concept['title']])}. "
f"The current demo learner already mastered this concept, with evaluator means {summary}, so the skill can use it as a stable explanation anchor." f"The current demo learner already mastered this concept, with evaluator means {summary}, so the skill can use it as a stable explanation anchor."
) )
return { return {
@ -126,6 +144,17 @@ def build_skill_grounded_explanation(context: SkillContext, concept_id: str) ->
"title": concept["title"], "title": concept["title"],
"explanation": explanation, "explanation": explanation,
"source_description": concept.get("description", ""), "source_description": concept.get("description", ""),
"grounding": {
"supporting_lessons": lessons,
"source_fragments": [
{
"lesson_title": fragment.get("lesson_title", ""),
"kind": fragment.get("kind", ""),
"text": fragment.get("text", ""),
}
for fragment in fragments
],
},
} }
@ -154,6 +183,7 @@ def evaluate_submission_with_skill(context: SkillContext, concept_id: str, submi
"skill_reference": { "skill_reference": {
"skill_name": context.skill_name, "skill_name": context.skill_name,
"mastered_by_demo_agent": mastered_reference, "mastered_by_demo_agent": mastered_reference,
"supporting_lessons": lesson_titles_for_concept(context.graph_bundle, concept_id),
}, },
"follow_up": ( "follow_up": (
"Extend the answer with an explicit limitation or assumption." "Extend the answer with an explicit limitation or assumption."
@ -205,6 +235,7 @@ def run_ocw_skill_agent_demo(skill_dir: str | Path, out_dir: str | Path) -> dict
"", "",
"## Explanation Demo", "## Explanation Demo",
explanation["explanation"], explanation["explanation"],
f"- Supporting lessons: {explanation['grounding']['supporting_lessons']}",
"", "",
"## Evaluation Demo", "## Evaluation Demo",
f"- Verdict: {evaluation['verdict']}", f"- Verdict: {evaluation['verdict']}",

View File

@ -87,7 +87,7 @@ def build_draft_pack(
"overrides": [], "overrides": [],
"profile_templates": {}, "profile_templates": {},
"cross_pack_links": [], "cross_pack_links": [],
"supporting_artifacts": ["source_corpus.json"], "supporting_artifacts": ["source_corpus.json", "knowledge_graph.json"],
} }
concepts_yaml = { concepts_yaml = {
"concepts": [ "concepts": [

View File

@ -0,0 +1,44 @@
from pathlib import Path
import json
from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates
from didactopus.knowledge_graph import build_knowledge_graph, write_knowledge_graph
from didactopus.rule_policy import RuleContext, build_default_rules, run_rules
SAMPLE = """
# Sample Course
## Module 1
### Lesson A
- Objective: Explain Topic A.
- Exercise: Do task A.
Topic A body.
### Lesson B
- Objective: Explain Topic B.
Lesson B body.
"""
def test_build_knowledge_graph_contains_typed_nodes_and_edges(tmp_path: Path) -> None:
course = parse_markdown_course(SAMPLE, "Sample Course")
concepts = extract_concept_candidates(course)
ctx = RuleContext(course=course, concepts=concepts)
run_rules(ctx, build_default_rules())
payload = build_knowledge_graph(course, ctx.concepts)
node_types = {node["type"] for node in payload["nodes"]}
edge_types = {edge["type"] for edge in payload["edges"]}
assert payload["summary"]["concept_count"] >= 2
assert "source" in node_types
assert "lesson" in node_types
assert "concept" in node_types
assert "assessment_signal" in node_types
assert "contains_lesson" in edge_types
assert "teaches_concept" in edge_types or "supports_concept" in edge_types
write_knowledge_graph(course, ctx.concepts, tmp_path)
written = json.loads((tmp_path / "knowledge_graph.json").read_text(encoding="utf-8"))
assert written["summary"]["node_count"] >= len(payload["nodes"])

View File

@ -28,11 +28,13 @@ def test_ocw_rolemesh_transcript_demo_writes_artifacts(tmp_path: Path) -> None:
assert payload["provider"] == "stub" assert payload["provider"] == "stub"
assert len(payload["transcript"]) >= 16 assert len(payload["transcript"]) >= 16
assert len(payload["curriculum_path_titles"]) >= 8 assert len(payload["curriculum_path_titles"]) >= 8
assert payload["graph_grounding_summary"]["node_count"] >= 1
assert payload["role_fallbacks"] == {} assert payload["role_fallbacks"] == {}
assert payload["status_updates"] == [] assert payload["status_updates"] == []
assert any(turn["speaker"] == "Didactopus Evaluator" for turn in payload["transcript"]) assert any(turn["speaker"] == "Didactopus Evaluator" for turn in payload["transcript"])
assert any("channel" in turn["content"].lower() for turn in payload["transcript"]) assert any("channel" in turn["content"].lower() for turn in payload["transcript"])
assert any("thermodynamic" in turn["content"].lower() for turn in payload["transcript"]) assert any("thermodynamic" in turn["content"].lower() for turn in payload["transcript"])
assert any("supporting lessons" in turn["content"].lower() or "grounding fragments" in turn["content"].lower() for turn in payload["transcript"])
assert all(not _looks_truncated(turn["content"]) for turn in payload["transcript"]) assert all(not _looks_truncated(turn["content"]) for turn in payload["transcript"])
assert (tmp_path / "rolemesh_transcript.json").exists() assert (tmp_path / "rolemesh_transcript.json").exists()
assert (tmp_path / "rolemesh_transcript.md").exists() assert (tmp_path / "rolemesh_transcript.md").exists()

View File

@ -17,6 +17,9 @@ def test_run_ocw_skill_agent_demo(tmp_path: Path) -> None:
assert (tmp_path / "skill_demo.json").exists() assert (tmp_path / "skill_demo.json").exists()
assert (tmp_path / "skill_demo.md").exists() assert (tmp_path / "skill_demo.md").exists()
assert payload["study_plan"]["steps"] assert payload["study_plan"]["steps"]
assert payload["study_plan"]["steps"][0]["supporting_lessons"]
assert "grounding" in payload["explanation"]
assert payload["explanation"]["grounding"]["supporting_lessons"]
assert payload["evaluation"]["verdict"] in {"acceptable", "needs_revision"} assert payload["evaluation"]["verdict"] in {"acceptable", "needs_revision"}
@ -30,4 +33,5 @@ def test_skill_demo_flags_weak_submission() -> None:
) )
assert result["verdict"] == "needs_revision" assert result["verdict"] == "needs_revision"
assert result["skill_reference"]["supporting_lessons"]
assert "Rework the answer" in result["follow_up"] assert "Rework the answer" in result["follow_up"]

View File

@ -1,7 +1,8 @@
from pathlib import Path from pathlib import Path
from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates
from didactopus.knowledge_graph import write_knowledge_graph
from didactopus.rule_policy import RuleContext, build_default_rules, run_rules from didactopus.rule_policy import RuleContext, build_default_rules, run_rules
from didactopus.pack_emitter import build_draft_pack, write_draft_pack from didactopus.pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus
SAMPLE = ''' SAMPLE = '''
# Sample Course # Sample Course
@ -20,5 +21,9 @@ def test_emit_pack(tmp_path: Path) -> None:
run_rules(ctx, build_default_rules()) run_rules(ctx, build_default_rules())
draft = build_draft_pack(course, ctx.concepts, "Tester", "REVIEW", ctx.review_flags) draft = build_draft_pack(course, ctx.concepts, "Tester", "REVIEW", ctx.review_flags)
write_draft_pack(draft, tmp_path) write_draft_pack(draft, tmp_path)
write_source_corpus(course, tmp_path)
write_knowledge_graph(course, ctx.concepts, tmp_path)
assert (tmp_path / "pack.yaml").exists() assert (tmp_path / "pack.yaml").exists()
assert (tmp_path / "review_report.md").exists() assert (tmp_path / "review_report.md").exists()
assert (tmp_path / "source_corpus.json").exists()
assert (tmp_path / "knowledge_graph.json").exists()