From 8b4359f4cce1fa4a9d33994e21f586a8c9128958 Mon Sep 17 00:00:00 2001 From: welsberr Date: Fri, 13 Mar 2026 06:22:39 -0400 Subject: [PATCH] Added course ingestion pipeline. --- README.md | 35 +++++ configs/config.example.yaml | 27 ++-- docs/course-to-pack.md | 35 +++++ docs/faq.md | 47 +++---- examples/generated_pack/concepts.yaml | 35 +++++ .../generated_pack/license_attribution.json | 5 + examples/generated_pack/pack.yaml | 13 ++ examples/generated_pack/projects.yaml | 7 + examples/generated_pack/review_report.md | 3 + examples/generated_pack/roadmap.yaml | 17 +++ examples/generated_pack/rubrics.yaml | 6 + examples/sample_course.md | 23 ++++ pyproject.toml | 11 +- src/didactopus/config.py | 46 ++----- src/didactopus/course_ingest.py | 128 ++++++++++++++++++ src/didactopus/course_schema.py | 44 ++++++ src/didactopus/main.py | 84 +++++++----- src/didactopus/pack_emitter.py | 78 +++++++++++ src/didactopus/rule_policy.py | 83 ++++++++++++ tests/test_course_ingest.py | 26 ++++ tests/test_pack_emitter.py | 24 ++++ tests/test_rule_policy.py | 24 ++++ 22 files changed, 683 insertions(+), 118 deletions(-) create mode 100644 docs/course-to-pack.md create mode 100644 examples/generated_pack/concepts.yaml create mode 100644 examples/generated_pack/license_attribution.json create mode 100644 examples/generated_pack/pack.yaml create mode 100644 examples/generated_pack/projects.yaml create mode 100644 examples/generated_pack/review_report.md create mode 100644 examples/generated_pack/roadmap.yaml create mode 100644 examples/generated_pack/rubrics.yaml create mode 100644 examples/sample_course.md create mode 100644 src/didactopus/course_ingest.py create mode 100644 src/didactopus/course_schema.py create mode 100644 src/didactopus/pack_emitter.py create mode 100644 src/didactopus/rule_policy.py create mode 100644 tests/test_course_ingest.py create mode 100644 tests/test_pack_emitter.py create mode 100644 tests/test_rule_policy.py diff --git a/README.md b/README.md index 5f56f4a..0964351 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,40 @@ ## Recent revisions +### Course Ingestion Pipeline + +This revision adds a **Course-to-Pack Ingestion Pipeline** plus a **stable rule-policy adapter layer**. + +The design goal is to turn open or user-supplied course materials into draft +Didactopus domain packs without introducing a brittle external rule-engine dependency. + +#### Why no third-party rule engine here? + +To minimize dependency risk, this scaffold uses a small declarative rule-policy +adapter implemented in pure Python and standard-library data structures. + +That gives Didactopus: +- portable rules +- inspectable rule definitions +- deterministic behavior +- zero extra runtime dependency for policy evaluation + +If a stronger rule engine is needed later, this adapter can remain the stable API surface. + +#### What is included + +- normalized course schema +- Markdown/HTML-ish text ingestion adapter +- module / lesson / objective extraction +- concept candidate extraction +- prerequisite guess generation +- rule-policy adapter +- draft pack emitter +- review report generation +- sample course input +- sample generated pack outputs + + ### Mastery Ledger This revision adds a **Mastery Ledger + Capability Export** layer. @@ -147,3 +181,4 @@ didactopus/ └── tests/ ``` + diff --git a/configs/config.example.yaml b/configs/config.example.yaml index 86617eb..6f0f9d8 100644 --- a/configs/config.example.yaml +++ b/configs/config.example.yaml @@ -1,18 +1,11 @@ -model_provider: - mode: local_first - local: - backend: ollama - endpoint: http://localhost:11434 - model_name: llama3.1:8b +course_ingest: + default_pack_author: "Wesley R. Elsberry" + default_license: "REVIEW-REQUIRED" + min_term_length: 4 + max_terms_per_lesson: 8 -platform: - default_dimension_thresholds: - correctness: 0.8 - explanation: 0.75 - transfer: 0.7 - project_execution: 0.75 - critique: 0.7 - -artifacts: - local_pack_dirs: - - domain-packs +rule_policy: + enable_prerequisite_order_rule: true + enable_duplicate_term_merge_rule: true + enable_project_detection_rule: true + enable_review_flags: true diff --git a/docs/course-to-pack.md b/docs/course-to-pack.md new file mode 100644 index 0000000..d679395 --- /dev/null +++ b/docs/course-to-pack.md @@ -0,0 +1,35 @@ +# Course-to-Pack Ingestion Pipeline + +The course-to-pack pipeline transforms educational material into Didactopus-native artifacts. + +## Inputs + +Typical sources: +- syllabus text +- lesson outlines +- markdown notes +- HTML course pages +- assignment sheets +- quiz prompts +- lecture transcripts + +## Normalized intermediate structure + +The pipeline builds a `NormalizedCourse` object containing: +- title +- source metadata +- modules +- lessons +- learning objectives +- exercises +- key terms +- project prompts + +## Rule-policy adapter + +The pipeline includes a small rule layer for stable policy transforms such as: +- suggest prerequisites from ordering +- merge repeated key-term candidates +- flag modules with no exercises +- flag concepts with weak evidence of distinctness +- suggest project concepts from capstone markers diff --git a/docs/faq.md b/docs/faq.md index f4c6e7d..ee84efc 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,37 +1,32 @@ # FAQ -## How is an AI student's learned mastery represented? +## Why add course ingestion? -As structured operational state, including: -- mastered concepts -- evaluator summaries -- weak dimensions -- evidence records -- artifacts -- capability export +Because many open or user-supplied courses already encode: +- topic sequencing +- learning objectives +- exercises +- project prompts +- terminology -## Does Didactopus change the AI model weights? +That makes them strong starting material for draft domain packs. -No. In the current architecture, Didactopus supervises and evaluates a learner -agent, but it does not retrain the foundation model. +## Why not just embed all course text? -## How is an AI student ready to be put to work? +Because Didactopus needs structured artifacts: +- concepts +- prerequisites +- projects +- rubrics +- mastery cues -Readiness is represented operationally. A downstream system can inspect: -- which concepts are mastered -- which weak dimensions remain -- what artifacts were produced -- what evaluator evidence supports deployment +A flat embedding store is not enough for mastery planning. -## Is the capability export a certification? +## Why avoid PyKE or another heavy rule engine here? -Not by itself. It is a structured mastery report. In future, it could be combined -with formal evaluators, signed evidence records, and policy rules. +Dependency stability matters. The current rule-policy adapter keeps rules simple, +transparent, and dependency-light. -## Why is this useful? +## Can the rule layer be replaced later? -Because it allows Didactopus outputs to feed into: -- task routing -- portfolio review -- benchmark comparison -- agent deployment policies +Yes. The adapter is designed so a future engine can be plugged in behind the same interface. diff --git a/examples/generated_pack/concepts.yaml b/examples/generated_pack/concepts.yaml new file mode 100644 index 0000000..c5e352c --- /dev/null +++ b/examples/generated_pack/concepts.yaml @@ -0,0 +1,35 @@ +concepts: +- id: descriptive-statistics + title: Descriptive Statistics + description: Descriptive Statistics introduces measures of center and spread. + prerequisites: [] + mastery_signals: + - Explain mean, median, and variance. + mastery_profile: {} +- id: probability-basics + title: Probability Basics + description: Probability Basics introduces events, likelihood, and Bayes-style reasoning. + prerequisites: + - descriptive-statistics + mastery_signals: + - Explain conditional probability. + mastery_profile: {} +- id: prior-and-posterior + title: Prior and Posterior + description: A Prior expresses assumptions before evidence. Posterior reasoning + updates belief after evidence. + prerequisites: + - probability-basics + mastery_signals: + - Explain a prior distribution. + - Explain how evidence changes belief. + mastery_profile: {} +- id: capstone-mini-project + title: Capstone Mini Project + description: This project asks learners to critique assumptions and produce a small + capstone artifact. + prerequisites: + - prior-and-posterior + mastery_signals: + - Write a short project report comparing priors and posteriors. + mastery_profile: {} diff --git a/examples/generated_pack/license_attribution.json b/examples/generated_pack/license_attribution.json new file mode 100644 index 0000000..d7e8530 --- /dev/null +++ b/examples/generated_pack/license_attribution.json @@ -0,0 +1,5 @@ +{ + "source_name": "Sample Course", + "source_url": "", + "rights_note": "REVIEW REQUIRED" +} \ No newline at end of file diff --git a/examples/generated_pack/pack.yaml b/examples/generated_pack/pack.yaml new file mode 100644 index 0000000..f22629e --- /dev/null +++ b/examples/generated_pack/pack.yaml @@ -0,0 +1,13 @@ +name: introductory-bayesian-inference +display_name: Introductory Bayesian Inference +version: 0.1.0-draft +schema_version: '1' +didactopus_min_version: 0.1.0 +didactopus_max_version: 0.9.99 +description: Draft pack generated from sample course. +author: Wesley R. Elsberry +license: REVIEW-REQUIRED +dependencies: [] +overrides: [] +profile_templates: {} +cross_pack_links: [] diff --git a/examples/generated_pack/projects.yaml b/examples/generated_pack/projects.yaml new file mode 100644 index 0000000..833383a --- /dev/null +++ b/examples/generated_pack/projects.yaml @@ -0,0 +1,7 @@ +projects: +- id: capstone-mini-project + title: Capstone Mini Project + difficulty: review-required + prerequisites: [] + deliverables: + - project artifact diff --git a/examples/generated_pack/review_report.md b/examples/generated_pack/review_report.md new file mode 100644 index 0000000..78c50dc --- /dev/null +++ b/examples/generated_pack/review_report.md @@ -0,0 +1,3 @@ +# Review Report + +- Module 'Module 2: Bayesian Updating' appears to contain project-like material; review project extraction. diff --git a/examples/generated_pack/roadmap.yaml b/examples/generated_pack/roadmap.yaml new file mode 100644 index 0000000..fbf0fc7 --- /dev/null +++ b/examples/generated_pack/roadmap.yaml @@ -0,0 +1,17 @@ +stages: +- id: stage-1 + title: 'Module 1: Foundations' + concepts: + - descriptive-statistics + - probability-basics + checkpoint: + - Summarize a small dataset. + - Compute a simple conditional probability. +- id: stage-2 + title: 'Module 2: Bayesian Updating' + concepts: + - prior-and-posterior + - capstone-mini-project + checkpoint: + - Compare prior and posterior beliefs. + - Write a short project report comparing priors and posteriors. diff --git a/examples/generated_pack/rubrics.yaml b/examples/generated_pack/rubrics.yaml new file mode 100644 index 0000000..65aee54 --- /dev/null +++ b/examples/generated_pack/rubrics.yaml @@ -0,0 +1,6 @@ +rubrics: +- id: draft-rubric + title: Draft Rubric + criteria: + - correctness + - explanation diff --git a/examples/sample_course.md b/examples/sample_course.md new file mode 100644 index 0000000..3dd437f --- /dev/null +++ b/examples/sample_course.md @@ -0,0 +1,23 @@ +# Introductory Bayesian Inference + +## Module 1: Foundations +### Descriptive Statistics +- Objective: Explain mean, median, and variance. +- Exercise: Summarize a small dataset. +Descriptive Statistics introduces measures of center and spread. + +### Probability Basics +- Objective: Explain conditional probability. +- Exercise: Compute a simple conditional probability. +Probability Basics introduces events, likelihood, and Bayes-style reasoning. + +## Module 2: Bayesian Updating +### Prior and Posterior +- Objective: Explain a prior distribution. +- Objective: Explain how evidence changes belief. +- Exercise: Compare prior and posterior beliefs. +A Prior expresses assumptions before evidence. Posterior reasoning updates belief after evidence. + +### Capstone Mini Project +- Exercise: Write a short project report comparing priors and posteriors. +This project asks learners to critique assumptions and produce a small capstone artifact. diff --git a/pyproject.toml b/pyproject.toml index 67da187..8ed44ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,21 +5,18 @@ build-backend = "setuptools.build_meta" [project] name = "didactopus" version = "0.1.0" -description = "Didactopus: local-first AI-assisted autodidactic mastery platform" +description = "Didactopus: course-to-pack ingestion scaffold" readme = "README.md" requires-python = ">=3.10" license = {text = "MIT"} authors = [{name = "Wesley R. Elsberry"}] -dependencies = [ - "pydantic>=2.7", - "pyyaml>=6.0", - "networkx>=3.2", -] +dependencies = ["pydantic>=2.7", "pyyaml>=6.0"] + [project.optional-dependencies] dev = ["pytest>=8.0", "ruff>=0.6"] [project.scripts] -didactopus = "didactopus.main:main" +didactopus-course-ingest = "didactopus.main:main" [tool.setuptools.packages.find] where = ["src"] diff --git a/src/didactopus/config.py b/src/didactopus/config.py index 0f52c28..5c1a6cc 100644 --- a/src/didactopus/config.py +++ b/src/didactopus/config.py @@ -3,45 +3,23 @@ from pydantic import BaseModel, Field import yaml -class PlatformConfig(BaseModel): - default_dimension_thresholds: dict[str, float] = Field( - default_factory=lambda: { - "correctness": 0.8, - "explanation": 0.75, - "transfer": 0.7, - "project_execution": 0.75, - "critique": 0.7, - } - ) +class CourseIngestConfig(BaseModel): + default_pack_author: str = "Unknown" + default_license: str = "REVIEW-REQUIRED" + min_term_length: int = 4 + max_terms_per_lesson: int = 8 -class PlannerConfig(BaseModel): - readiness_bonus: float = 2.0 - target_distance_weight: float = 1.0 - weak_dimension_bonus: float = 1.2 - fragile_review_bonus: float = 1.5 - project_unlock_bonus: float = 0.8 - semantic_similarity_weight: float = 1.0 - - -class EvidenceConfig(BaseModel): - resurfacing_threshold: float = 0.55 - confidence_threshold: float = 0.8 - evidence_weights: dict[str, float] = Field( - default_factory=lambda: { - "explanation": 1.0, - "problem": 1.5, - "project": 2.5, - "transfer": 2.0, - } - ) - recent_evidence_multiplier: float = 1.35 +class RulePolicyConfig(BaseModel): + enable_prerequisite_order_rule: bool = True + enable_duplicate_term_merge_rule: bool = True + enable_project_detection_rule: bool = True + enable_review_flags: bool = True class AppConfig(BaseModel): - platform: PlatformConfig = Field(default_factory=PlatformConfig) - planner: PlannerConfig = Field(default_factory=PlannerConfig) - evidence: EvidenceConfig = Field(default_factory=EvidenceConfig) + course_ingest: CourseIngestConfig = Field(default_factory=CourseIngestConfig) + rule_policy: RulePolicyConfig = Field(default_factory=RulePolicyConfig) def load_config(path: str | Path) -> AppConfig: diff --git a/src/didactopus/course_ingest.py b/src/didactopus/course_ingest.py new file mode 100644 index 0000000..b51ec54 --- /dev/null +++ b/src/didactopus/course_ingest.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import re +from .course_schema import NormalizedCourse, Module, Lesson, ConceptCandidate + +HEADING_RE = re.compile(r"^(#{1,3})\s+(.*)$") +BULLET_RE = re.compile(r"^\s*[-*+]\s+(.*)$") + + +def slugify(text: str) -> str: + cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", text.strip().lower()).strip("-") + return cleaned or "untitled" + + +def extract_key_terms(text: str, min_term_length: int = 4, max_terms: int = 8) -> list[str]: + candidates = re.findall(r"\b[A-Z][A-Za-z0-9\-]{%d,}\b" % (min_term_length - 1), text) + seen = set() + ordered = [] + for term in candidates: + if term not in seen: + seen.add(term) + ordered.append(term) + if len(ordered) >= max_terms: + break + return ordered + + +def parse_markdown_course(text: str, title: str, source_name: str = "", source_url: str = "", rights_note: str = "") -> NormalizedCourse: + lines = text.splitlines() + modules: list[Module] = [] + current_module: Module | None = None + current_lesson: Lesson | None = None + body_buffer: list[str] = [] + + def flush_body(): + nonlocal body_buffer, current_lesson + if current_lesson is not None and body_buffer: + current_lesson.body = "\n".join(body_buffer).strip() + body_buffer = [] + + for line in lines: + m = HEADING_RE.match(line) + if m: + level = len(m.group(1)) + heading = m.group(2).strip() + if level == 1: + continue + elif level == 2: + flush_body() + if current_lesson is not None and current_module is not None: + current_module.lessons.append(current_lesson) + current_lesson = None + if current_module is not None: + modules.append(current_module) + current_module = Module(title=heading, lessons=[]) + elif level == 3: + flush_body() + if current_lesson is not None and current_module is not None: + current_module.lessons.append(current_lesson) + current_lesson = Lesson(title=heading) + continue + + bullet = BULLET_RE.match(line) + if bullet and current_lesson is not None: + item = bullet.group(1).strip() + lower = item.lower() + if lower.startswith("objective:"): + current_lesson.objectives.append(item.split(":", 1)[1].strip()) + elif lower.startswith("exercise:"): + current_lesson.exercises.append(item.split(":", 1)[1].strip()) + else: + body_buffer.append(line) + else: + body_buffer.append(line) + + flush_body() + if current_lesson is not None and current_module is not None: + current_module.lessons.append(current_lesson) + if current_module is not None: + modules.append(current_module) + + course = NormalizedCourse( + title=title, + source_name=source_name, + source_url=source_url, + rights_note=rights_note, + modules=modules, + ) + for module in course.modules: + for lesson in module.lessons: + lesson.key_terms = extract_key_terms(f"{lesson.title}\n{lesson.body}") + return course + + +def extract_concept_candidates(course: NormalizedCourse) -> list[ConceptCandidate]: + concepts: list[ConceptCandidate] = [] + seen_ids: set[str] = set() + for module in course.modules: + for lesson in module.lessons: + title_id = slugify(lesson.title) + if title_id not in seen_ids: + seen_ids.add(title_id) + concepts.append( + ConceptCandidate( + id=title_id, + title=lesson.title, + description=lesson.body[:240].strip(), + source_modules=[module.title], + source_lessons=[lesson.title], + mastery_signals=list(lesson.objectives[:3] or lesson.exercises[:2]), + ) + ) + for term in lesson.key_terms: + term_id = slugify(term) + if term_id in seen_ids: + continue + seen_ids.add(term_id) + concepts.append( + ConceptCandidate( + id=term_id, + title=term, + description=f"Candidate concept extracted from lesson '{lesson.title}'.", + source_modules=[module.title], + source_lessons=[lesson.title], + mastery_signals=list(lesson.objectives[:2]), + ) + ) + return concepts diff --git a/src/didactopus/course_schema.py b/src/didactopus/course_schema.py new file mode 100644 index 0000000..b647a31 --- /dev/null +++ b/src/didactopus/course_schema.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from pydantic import BaseModel, Field + + +class Lesson(BaseModel): + title: str + body: str = "" + objectives: list[str] = Field(default_factory=list) + exercises: list[str] = Field(default_factory=list) + key_terms: list[str] = Field(default_factory=list) + + +class Module(BaseModel): + title: str + lessons: list[Lesson] = Field(default_factory=list) + + +class NormalizedCourse(BaseModel): + title: str + source_name: str = "" + source_url: str = "" + rights_note: str = "" + modules: list[Module] = Field(default_factory=list) + + +class ConceptCandidate(BaseModel): + id: str + title: str + description: str = "" + source_modules: list[str] = Field(default_factory=list) + source_lessons: list[str] = Field(default_factory=list) + prerequisites: list[str] = Field(default_factory=list) + mastery_signals: list[str] = Field(default_factory=list) + + +class DraftPack(BaseModel): + pack: dict + concepts: dict + roadmap: dict + projects: dict + rubrics: dict + review_report: list[str] = Field(default_factory=list) + attribution: dict = Field(default_factory=dict) diff --git a/src/didactopus/main.py b/src/didactopus/main.py index 93af9ca..d113ef5 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -1,49 +1,65 @@ +from __future__ import annotations + import argparse from pathlib import Path -from .agentic_loop import run_demo_agentic_loop -from .mastery_ledger import ( - build_capability_profile, - export_capability_profile_json, - export_capability_report_markdown, - export_artifact_manifest, -) +from .config import load_config +from .course_ingest import parse_markdown_course, extract_concept_candidates +from .rule_policy import RuleContext, build_default_rules, run_rules +from .pack_emitter import build_draft_pack, write_draft_pack def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Didactopus mastery ledger demo") - parser.add_argument("--domain", default="Bayesian inference") - parser.add_argument("--outdir", default="exports") + parser = argparse.ArgumentParser(description="Didactopus course-to-pack ingestion pipeline") + parser.add_argument("--input", required=True) + parser.add_argument("--title", required=True) + parser.add_argument("--source-name", default="") + parser.add_argument("--source-url", default="") + parser.add_argument("--rights-note", default="REVIEW REQUIRED") + parser.add_argument("--output-dir", default="generated-pack") + parser.add_argument("--config", default="configs/config.example.yaml") return parser def main() -> None: args = build_parser().parse_args() - outdir = Path(args.outdir) - outdir.mkdir(parents=True, exist_ok=True) + config = load_config(args.config) + text = Path(args.input).read_text(encoding="utf-8") - concepts = [ - "foundations-statistics::descriptive-statistics", - "foundations-statistics::probability-basics", - "bayes-extension::prior", - "bayes-extension::posterior", - "applied-inference::model-checking", - ] - state = run_demo_agentic_loop(concepts) - profile = build_capability_profile(state, args.domain) + course = parse_markdown_course( + text=text, + title=args.title, + source_name=args.source_name, + source_url=args.source_url, + rights_note=args.rights_note, + ) + concepts = extract_concept_candidates(course) + context = RuleContext(course=course, concepts=concepts) - json_path = outdir / "capability_profile.json" - md_path = outdir / "capability_report.md" - manifest_path = outdir / "artifact_manifest.json" + rules = build_default_rules( + enable_prereq=config.rule_policy.enable_prerequisite_order_rule, + enable_merge=config.rule_policy.enable_duplicate_term_merge_rule, + enable_projects=config.rule_policy.enable_project_detection_rule, + enable_review=config.rule_policy.enable_review_flags, + ) + run_rules(context, rules) - export_capability_profile_json(profile, str(json_path)) - export_capability_report_markdown(profile, str(md_path)) - export_artifact_manifest(profile, str(manifest_path)) + draft = build_draft_pack( + course=course, + concepts=context.concepts, + author=config.course_ingest.default_pack_author, + license_name=config.course_ingest.default_license, + review_flags=context.review_flags, + ) + write_draft_pack(draft, args.output_dir) - print("== Didactopus Mastery Ledger Demo ==") - print(f"Domain: {args.domain}") - print(f"Mastered concepts: {len(profile.mastered_concepts)}") - print(f"Artifacts: {len(profile.artifacts)}") - print(f"Capability profile JSON: {json_path}") - print(f"Capability report Markdown: {md_path}") - print(f"Artifact manifest JSON: {manifest_path}") + print("== Didactopus Course-to-Pack Ingest ==") + print(f"Course: {course.title}") + print(f"Modules: {len(course.modules)}") + print(f"Concept candidates: {len(context.concepts)}") + print(f"Review flags: {len(context.review_flags)}") + print(f"Output dir: {args.output_dir}") + + +if __name__ == "__main__": + main() diff --git a/src/didactopus/pack_emitter.py b/src/didactopus/pack_emitter.py new file mode 100644 index 0000000..8b4d403 --- /dev/null +++ b/src/didactopus/pack_emitter.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from pathlib import Path +import json +import yaml +from .course_schema import NormalizedCourse, ConceptCandidate, DraftPack + + +def build_draft_pack(course: NormalizedCourse, concepts: list[ConceptCandidate], author: str, license_name: str, review_flags: list[str]) -> DraftPack: + pack_name = course.title.lower().replace(" ", "-") + pack = { + "name": pack_name, + "display_name": course.title, + "version": "0.1.0-draft", + "schema_version": "1", + "didactopus_min_version": "0.1.0", + "didactopus_max_version": "0.9.99", + "description": f"Draft pack generated from course source '{course.source_name or course.title}'.", + "author": author, + "license": license_name, + "dependencies": [], + "overrides": [], + "profile_templates": {}, + "cross_pack_links": [], + } + concepts_yaml = { + "concepts": [ + { + "id": c.id, + "title": c.title, + "description": c.description, + "prerequisites": c.prerequisites, + "mastery_signals": c.mastery_signals, + "mastery_profile": {}, + } + for c in concepts + ] + } + roadmap = { + "stages": [ + { + "id": f"stage-{i+1}", + "title": module.title, + "concepts": [c.id for c in concepts if module.title in c.source_modules and c.title in c.source_lessons], + "checkpoint": [ex for lesson in module.lessons for ex in lesson.exercises[:2]], + } + for i, module in enumerate(course.modules) + ] + } + project_items = [] + for module in course.modules: + for lesson in module.lessons: + text = f"{lesson.title}\n{lesson.body}".lower() + if "project" in text or "capstone" in text: + project_items.append({ + "id": lesson.title.lower().replace(" ", "-"), + "title": lesson.title, + "difficulty": "review-required", + "prerequisites": [], + "deliverables": ["project artifact"], + }) + projects = {"projects": project_items} + rubrics = {"rubrics": [{"id": "draft-rubric", "title": "Draft Rubric", "criteria": ["correctness", "explanation"]}]} + attribution = {"source_name": course.source_name, "source_url": course.source_url, "rights_note": course.rights_note} + return DraftPack(pack=pack, concepts=concepts_yaml, roadmap=roadmap, projects=projects, rubrics=rubrics, review_report=review_flags, attribution=attribution) + + +def write_draft_pack(pack: DraftPack, outdir: str | Path) -> None: + out = Path(outdir) + out.mkdir(parents=True, exist_ok=True) + (out / "pack.yaml").write_text(yaml.safe_dump(pack.pack, sort_keys=False), encoding="utf-8") + (out / "concepts.yaml").write_text(yaml.safe_dump(pack.concepts, sort_keys=False), encoding="utf-8") + (out / "roadmap.yaml").write_text(yaml.safe_dump(pack.roadmap, sort_keys=False), encoding="utf-8") + (out / "projects.yaml").write_text(yaml.safe_dump(pack.projects, sort_keys=False), encoding="utf-8") + (out / "rubrics.yaml").write_text(yaml.safe_dump(pack.rubrics, sort_keys=False), encoding="utf-8") + review_lines = ["# Review Report", ""] + [f"- {flag}" for flag in pack.review_report] if pack.review_report else ["# Review Report", "", "- none"] + (out / "review_report.md").write_text("\n".join(review_lines), encoding="utf-8") + (out / "license_attribution.json").write_text(json.dumps(pack.attribution, indent=2), encoding="utf-8") diff --git a/src/didactopus/rule_policy.py b/src/didactopus/rule_policy.py new file mode 100644 index 0000000..8f7747b --- /dev/null +++ b/src/didactopus/rule_policy.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable +from .course_schema import NormalizedCourse, ConceptCandidate + + +@dataclass +class RuleContext: + course: NormalizedCourse + concepts: list[ConceptCandidate] + review_flags: list[str] = field(default_factory=list) + + +@dataclass +class Rule: + name: str + predicate: Callable[[RuleContext], bool] + action: Callable[[RuleContext], None] + + +def order_based_prerequisite_rule(context: RuleContext) -> None: + concept_titles = {c.title: c for c in context.concepts} + previous = None + for module in context.course.modules: + for lesson in module.lessons: + current = concept_titles.get(lesson.title) + if current is not None and previous is not None and previous.id not in current.prerequisites: + current.prerequisites.append(previous.id) + if current is not None: + previous = current + + +def duplicate_term_merge_rule(context: RuleContext) -> None: + seen = {} + deduped = [] + for concept in context.concepts: + key = concept.title.strip().lower() + if key in seen: + seen[key].source_modules.extend(x for x in concept.source_modules if x not in seen[key].source_modules) + seen[key].source_lessons.extend(x for x in concept.source_lessons if x not in seen[key].source_lessons) + if concept.description and len(seen[key].description) < len(concept.description): + seen[key].description = concept.description + else: + seen[key] = concept + deduped.append(concept) + context.concepts[:] = deduped + + +def project_detection_rule(context: RuleContext) -> None: + for module in context.course.modules: + joined = " ".join(lesson.body for lesson in module.lessons).lower() + if "project" in joined or "capstone" in joined: + context.review_flags.append(f"Module '{module.title}' appears to contain project-like material; review project extraction.") + + +def review_flag_rule(context: RuleContext) -> None: + for module in context.course.modules: + if not any(lesson.exercises for lesson in module.lessons): + context.review_flags.append(f"Module '{module.title}' has no explicit exercises; mastery signals may be weak.") + for concept in context.concepts: + if not concept.mastery_signals: + context.review_flags.append(f"Concept '{concept.title}' has no extracted mastery signals; review manually.") + + +def build_default_rules(enable_prereq=True, enable_merge=True, enable_projects=True, enable_review=True) -> list[Rule]: + rules = [] + if enable_prereq: + rules.append(Rule("order_based_prerequisite_rule", lambda ctx: True, order_based_prerequisite_rule)) + if enable_merge: + rules.append(Rule("duplicate_term_merge_rule", lambda ctx: True, duplicate_term_merge_rule)) + if enable_projects: + rules.append(Rule("project_detection_rule", lambda ctx: True, project_detection_rule)) + if enable_review: + rules.append(Rule("review_flag_rule", lambda ctx: True, review_flag_rule)) + return rules + + +def run_rules(context: RuleContext, rules: list[Rule]) -> RuleContext: + for rule in rules: + if rule.predicate(context): + rule.action(context) + return context diff --git a/tests/test_course_ingest.py b/tests/test_course_ingest.py new file mode 100644 index 0000000..5d39d82 --- /dev/null +++ b/tests/test_course_ingest.py @@ -0,0 +1,26 @@ +from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates + +SAMPLE = ''' +# Sample Course + +## Module 1 +### Lesson A +- Objective: Explain Topic A. +- Exercise: Do task A. +Topic A body. + +### Lesson B +- Objective: Explain Topic B. +Topic B body. +''' + +def test_parse_markdown_course() -> None: + course = parse_markdown_course(SAMPLE, "Sample Course") + assert course.title == "Sample Course" + assert len(course.modules) == 1 + assert len(course.modules[0].lessons) == 2 + +def test_extract_concepts() -> None: + course = parse_markdown_course(SAMPLE, "Sample Course") + concepts = extract_concept_candidates(course) + assert len(concepts) >= 2 diff --git a/tests/test_pack_emitter.py b/tests/test_pack_emitter.py new file mode 100644 index 0000000..5db707a --- /dev/null +++ b/tests/test_pack_emitter.py @@ -0,0 +1,24 @@ +from pathlib import Path +from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates +from didactopus.rule_policy import RuleContext, build_default_rules, run_rules +from didactopus.pack_emitter import build_draft_pack, write_draft_pack + +SAMPLE = ''' +# Sample Course + +## Module 1 +### Lesson A +- Objective: Explain Topic A. +- Exercise: Do task A. +Topic A body. +''' + +def test_emit_pack(tmp_path: Path) -> None: + course = parse_markdown_course(SAMPLE, "Sample Course") + concepts = extract_concept_candidates(course) + ctx = RuleContext(course=course, concepts=concepts) + run_rules(ctx, build_default_rules()) + draft = build_draft_pack(course, ctx.concepts, "Tester", "REVIEW", ctx.review_flags) + write_draft_pack(draft, tmp_path) + assert (tmp_path / "pack.yaml").exists() + assert (tmp_path / "review_report.md").exists() diff --git a/tests/test_rule_policy.py b/tests/test_rule_policy.py new file mode 100644 index 0000000..583f751 --- /dev/null +++ b/tests/test_rule_policy.py @@ -0,0 +1,24 @@ +from didactopus.course_ingest import parse_markdown_course, extract_concept_candidates +from didactopus.rule_policy import RuleContext, build_default_rules, run_rules + +SAMPLE = ''' +# Sample Course + +## Module 1 +### Lesson A +- Objective: Explain Topic A. +- Exercise: Do task A. +Topic A body. + +### Lesson B +- Objective: Explain Topic B. +- Exercise: Do task B. +Topic B body. +''' + +def test_rules_run() -> None: + course = parse_markdown_course(SAMPLE, "Sample Course") + concepts = extract_concept_candidates(course) + ctx = RuleContext(course=course, concepts=concepts) + run_rules(ctx, build_default_rules()) + assert len(ctx.concepts) >= 2