diff --git a/README.md b/README.md index 0964351..d35d32c 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,39 @@ ## Recent revisions +### Multi-Source Course Ingestion + +This revision adds a **Multi-Source Course Ingestion Layer**. + +The pipeline can now accept multiple source files representing the same course or +topic domain, normalize them into a shared intermediate representation, merge them, +and emit a single draft Didactopus pack plus a conflict report. + +#### Supported scaffold source types + +Current scaffold adapters: +- Markdown (`.md`) +- Plain text (`.txt`) +- HTML-ish text (`.html`, `.htm`) +- Transcript text (`.transcript.txt`) +- Syllabus text (`.syllabus.txt`) + +This revision is intentionally adapter-oriented, so future PDF, slide, and DOCX +adapters can be added behind the same interface. + +#### What is included + +- multi-source adapter dispatch +- normalized source records +- source merge logic +- cross-source terminology conflict report +- duplicate lesson/title detection +- merged draft pack emission +- merged attribution manifest +- sample multi-source inputs +- sample merged output pack + + ### Course Ingestion Pipeline This revision adds a **Course-to-Pack Ingestion Pipeline** plus a **stable rule-policy adapter layer**. @@ -182,3 +215,4 @@ didactopus/ ``` + diff --git a/configs/config.example.yaml b/configs/config.example.yaml index 6f0f9d8..6047dab 100644 --- a/configs/config.example.yaml +++ b/configs/config.example.yaml @@ -9,3 +9,8 @@ rule_policy: enable_duplicate_term_merge_rule: true enable_project_detection_rule: true enable_review_flags: true + +multisource: + detect_duplicate_lessons: true + detect_term_conflicts: true + merge_same_named_lessons: true diff --git a/docs/faq.md b/docs/faq.md index ee84efc..b5165bf 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,32 +1,27 @@ # FAQ -## Why add course ingestion? +## Why multi-source ingestion? -Because many open or user-supplied courses already encode: -- topic sequencing -- learning objectives -- exercises -- project prompts -- terminology +Because course structure is usually distributed across several files rather than +perfectly contained in one source. -That makes them strong starting material for draft domain packs. +## What kinds of conflicts can arise? -## Why not just embed all course text? +Common examples: +- the same lesson with slightly different titles +- inconsistent terminology across notes and transcripts +- exercises present in one source but absent in another +- project prompts implied in one file and explicit in another -Because Didactopus needs structured artifacts: +## Does the system resolve all conflicts automatically? + +No. It produces a merged draft pack and a conflict report for human review. + +## Why not rely only on embeddings for this? + +Because Didactopus needs explicit structures such as: - concepts - prerequisites - projects - rubrics -- mastery cues - -A flat embedding store is not enough for mastery planning. - -## Why avoid PyKE or another heavy rule engine here? - -Dependency stability matters. The current rule-policy adapter keeps rules simple, -transparent, and dependency-light. - -## Can the rule layer be replaced later? - -Yes. The adapter is designed so a future engine can be plugged in behind the same interface. +- checkpoints diff --git a/docs/multisource-ingestion.md b/docs/multisource-ingestion.md new file mode 100644 index 0000000..63bc795 --- /dev/null +++ b/docs/multisource-ingestion.md @@ -0,0 +1,34 @@ +# Multi-Source Ingestion + +The multi-source ingestion layer lets Didactopus build one draft domain pack from +several heterogeneous inputs describing the same course or topic. + +## Why this matters + +Real course material is often scattered across: +- syllabus files +- lesson notes +- transcripts +- assignment sheets +- HTML pages +- supplemental markdown + +A single-source parser is too narrow for serious curriculum distillation. + +## Pipeline + +1. detect adapter by file extension or naming convention +2. normalize each source into a `NormalizedSourceRecord` +3. merge sources into a `NormalizedCourse` +4. extract concept candidates +5. run rule-policy passes +6. emit merged draft pack +7. emit conflict report and attribution manifest + +## Conflict report categories + +- duplicate lesson titles across sources +- repeated key terms with different local contexts +- modules with no explicit exercises +- project-like content needing manual review +- lessons with thin mastery signals diff --git a/examples/generated_pack/conflict_report.md b/examples/generated_pack/conflict_report.md new file mode 100644 index 0000000..f816656 --- /dev/null +++ b/examples/generated_pack/conflict_report.md @@ -0,0 +1,3 @@ +# Conflict Report + +- Key term 'prior' appears in multiple lesson contexts: Prior and Posterior diff --git a/examples/generated_pack/license_attribution.json b/examples/generated_pack/license_attribution.json index d7e8530..f8690a4 100644 --- a/examples/generated_pack/license_attribution.json +++ b/examples/generated_pack/license_attribution.json @@ -1,5 +1,20 @@ { - "source_name": "Sample Course", - "source_url": "", - "rights_note": "REVIEW REQUIRED" + "rights_note": "REVIEW REQUIRED", + "sources": [ + { + "source_name": "sample_course_syllabus.syllabus.txt", + "source_type": "syllabus", + "source_path": "examples/sample_course_syllabus.syllabus.txt" + }, + { + "source_name": "sample_course_notes.md", + "source_type": "markdown", + "source_path": "examples/sample_course_notes.md" + }, + { + "source_name": "sample_course_lecture.transcript.txt", + "source_type": "transcript", + "source_path": "examples/sample_course_lecture.transcript.txt" + } + ] } \ No newline at end of file diff --git a/examples/generated_pack/pack.yaml b/examples/generated_pack/pack.yaml index f22629e..04d6970 100644 --- a/examples/generated_pack/pack.yaml +++ b/examples/generated_pack/pack.yaml @@ -4,7 +4,8 @@ version: 0.1.0-draft schema_version: '1' didactopus_min_version: 0.1.0 didactopus_max_version: 0.9.99 -description: Draft pack generated from sample course. +description: Draft pack generated from multi-source course inputs for 'Introductory + Bayesian Inference'. author: Wesley R. Elsberry license: REVIEW-REQUIRED dependencies: [] diff --git a/examples/sample_course_lecture.transcript.txt b/examples/sample_course_lecture.transcript.txt new file mode 100644 index 0000000..cb0caba --- /dev/null +++ b/examples/sample_course_lecture.transcript.txt @@ -0,0 +1,5 @@ +# Introductory Bayesian Inference + +## Module 2: Bayesian Updating +### Prior and Posterior +In this lecture we revisit Prior and Posterior and discuss model assumptions, bias, and uncertainty. diff --git a/examples/sample_course_notes.md b/examples/sample_course_notes.md new file mode 100644 index 0000000..e2de2ef --- /dev/null +++ b/examples/sample_course_notes.md @@ -0,0 +1,16 @@ +# Introductory Bayesian Inference + +## Module 1: Foundations +### Descriptive Statistics +Descriptive Statistics introduces measures of center and spread. + +### Probability Basics +Probability Basics introduces events, likelihood, and Bayes-style reasoning. + +## Module 2: Bayesian Updating +### Prior and Posterior +A Prior expresses assumptions before evidence. Posterior reasoning updates belief after evidence. + +### Capstone Mini Project +- Exercise: Write a short project report comparing priors and posteriors. +This project asks learners to critique assumptions and produce a small capstone artifact. diff --git a/examples/sample_course_syllabus.syllabus.txt b/examples/sample_course_syllabus.syllabus.txt new file mode 100644 index 0000000..72e30c7 --- /dev/null +++ b/examples/sample_course_syllabus.syllabus.txt @@ -0,0 +1,16 @@ +# Introductory Bayesian Inference + +## Module 1: Foundations +### Descriptive Statistics +- Objective: Explain mean, median, and variance. +- Exercise: Summarize a small dataset. + +### Probability Basics +- Objective: Explain conditional probability. +- Exercise: Compute a simple conditional probability. + +## Module 2: Bayesian Updating +### Prior and Posterior +- Objective: Explain a prior distribution. +- Objective: Explain how evidence changes belief. +- Exercise: Compare prior and posterior beliefs. diff --git a/pyproject.toml b/pyproject.toml index 8ed44ec..0b95f2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "didactopus" version = "0.1.0" -description = "Didactopus: course-to-pack ingestion scaffold" +description = "Didactopus: multi-source course-to-pack ingestion scaffold" readme = "README.md" requires-python = ">=3.10" license = {text = "MIT"} diff --git a/src/didactopus/config.py b/src/didactopus/config.py index 5c1a6cc..c42b9e7 100644 --- a/src/didactopus/config.py +++ b/src/didactopus/config.py @@ -17,9 +17,16 @@ class RulePolicyConfig(BaseModel): enable_review_flags: bool = True +class MultisourceConfig(BaseModel): + detect_duplicate_lessons: bool = True + detect_term_conflicts: bool = True + merge_same_named_lessons: bool = True + + class AppConfig(BaseModel): course_ingest: CourseIngestConfig = Field(default_factory=CourseIngestConfig) rule_policy: RulePolicyConfig = Field(default_factory=RulePolicyConfig) + multisource: MultisourceConfig = Field(default_factory=MultisourceConfig) def load_config(path: str | Path) -> AppConfig: diff --git a/src/didactopus/conflict_report.py b/src/didactopus/conflict_report.py new file mode 100644 index 0000000..c423772 --- /dev/null +++ b/src/didactopus/conflict_report.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from collections import defaultdict +from .course_schema import NormalizedCourse, ConceptCandidate + + +def detect_duplicate_lessons(course: NormalizedCourse) -> list[str]: + seen: dict[str, list[str]] = defaultdict(list) + for module in course.modules: + for lesson in module.lessons: + seen[lesson.title.lower()].append(module.title) + flags = [] + for title, modules in seen.items(): + if len(modules) > 1: + flags.append(f"Lesson title '{title}' appears in multiple modules: {', '.join(sorted(set(modules)))}") + return flags + + +def detect_term_conflicts(course: NormalizedCourse) -> list[str]: + contexts: dict[str, set[str]] = defaultdict(set) + for module in course.modules: + for lesson in module.lessons: + for term in lesson.key_terms: + contexts[term.lower()].add(lesson.title) + flags = [] + for term, lessons in contexts.items(): + if len(lessons) > 1: + flags.append(f"Key term '{term}' appears in multiple lesson contexts: {', '.join(sorted(lessons))}") + return flags + + +def detect_thin_concepts(concepts: list[ConceptCandidate]) -> list[str]: + flags = [] + for concept in concepts: + if not concept.mastery_signals: + flags.append(f"Concept '{concept.title}' has no mastery signals.") + if len(concept.description.strip()) < 20: + flags.append(f"Concept '{concept.title}' has a very thin description.") + return flags diff --git a/src/didactopus/course_ingest.py b/src/didactopus/course_ingest.py index b51ec54..b68844b 100644 --- a/src/didactopus/course_ingest.py +++ b/src/didactopus/course_ingest.py @@ -1,7 +1,8 @@ from __future__ import annotations import re -from .course_schema import NormalizedCourse, Module, Lesson, ConceptCandidate +from pathlib import Path +from .course_schema import NormalizedCourse, NormalizedSourceRecord, Module, Lesson, ConceptCandidate HEADING_RE = re.compile(r"^(#{1,3})\s+(.*)$") BULLET_RE = re.compile(r"^\s*[-*+]\s+(.*)$") @@ -12,6 +13,23 @@ def slugify(text: str) -> str: return cleaned or "untitled" +def detect_source_type(path: str | Path) -> str: + p = Path(path) + name = p.name.lower() + suffix = p.suffix.lower() + if name.endswith(".transcript.txt"): + return "transcript" + if name.endswith(".syllabus.txt"): + return "syllabus" + if suffix in {".md"}: + return "markdown" + if suffix in {".html", ".htm"}: + return "html" + if suffix in {".txt"}: + return "text" + return "unknown" + + def extract_key_terms(text: str, min_term_length: int = 4, max_terms: int = 8) -> list[str]: candidates = re.findall(r"\b[A-Z][A-Za-z0-9\-]{%d,}\b" % (min_term_length - 1), text) seen = set() @@ -25,7 +43,7 @@ def extract_key_terms(text: str, min_term_length: int = 4, max_terms: int = 8) - return ordered -def parse_markdown_course(text: str, title: str, source_name: str = "", source_url: str = "", rights_note: str = "") -> NormalizedCourse: +def parse_markdown_like(text: str, title: str, source_name: str, source_path: str) -> NormalizedSourceRecord: lines = text.splitlines() modules: list[Module] = [] current_module: Module | None = None @@ -57,7 +75,7 @@ def parse_markdown_course(text: str, title: str, source_name: str = "", source_u flush_body() if current_lesson is not None and current_module is not None: current_module.lessons.append(current_lesson) - current_lesson = Lesson(title=heading) + current_lesson = Lesson(title=heading, source_refs=[source_name]) continue bullet = BULLET_RE.match(line) @@ -79,17 +97,59 @@ def parse_markdown_course(text: str, title: str, source_name: str = "", source_u if current_module is not None: modules.append(current_module) - course = NormalizedCourse( - title=title, - source_name=source_name, - source_url=source_url, - rights_note=rights_note, - modules=modules, - ) - for module in course.modules: + for module in modules: for lesson in module.lessons: lesson.key_terms = extract_key_terms(f"{lesson.title}\n{lesson.body}") - return course + return NormalizedSourceRecord( + source_name=source_name, + source_type=detect_source_type(source_path), + source_path=str(source_path), + title=title, + modules=modules, + ) + + +def parse_source_file(path: str | Path, title: str = "") -> NormalizedSourceRecord: + p = Path(path) + text = p.read_text(encoding="utf-8") + inferred_title = title or p.stem.replace("_", " ").replace("-", " ").title() + return parse_markdown_like(text=text, title=inferred_title, source_name=p.name, source_path=str(p)) + + +def merge_source_records(records: list[NormalizedSourceRecord], course_title: str, rights_note: str = "", merge_same_named_lessons: bool = True) -> NormalizedCourse: + modules_by_title: dict[str, Module] = {} + for record in records: + for module in record.modules: + target_module = modules_by_title.setdefault(module.title, Module(title=module.title, lessons=[])) + if merge_same_named_lessons: + lesson_map = {lesson.title: lesson for lesson in target_module.lessons} + for lesson in module.lessons: + if lesson.title in lesson_map: + existing = lesson_map[lesson.title] + if lesson.body and lesson.body not in existing.body: + existing.body = (existing.body + "\n\n" + lesson.body).strip() + for item in lesson.objectives: + if item not in existing.objectives: + existing.objectives.append(item) + for item in lesson.exercises: + if item not in existing.exercises: + existing.exercises.append(item) + for item in lesson.key_terms: + if item not in existing.key_terms: + existing.key_terms.append(item) + for item in lesson.source_refs: + if item not in existing.source_refs: + existing.source_refs.append(item) + else: + target_module.lessons.append(lesson) + else: + target_module.lessons.extend(module.lessons) + return NormalizedCourse( + title=course_title, + rights_note=rights_note, + modules=list(modules_by_title.values()), + source_records=records, + ) def extract_concept_candidates(course: NormalizedCourse) -> list[ConceptCandidate]: diff --git a/src/didactopus/course_schema.py b/src/didactopus/course_schema.py index b647a31..ed5803d 100644 --- a/src/didactopus/course_schema.py +++ b/src/didactopus/course_schema.py @@ -9,6 +9,7 @@ class Lesson(BaseModel): objectives: list[str] = Field(default_factory=list) exercises: list[str] = Field(default_factory=list) key_terms: list[str] = Field(default_factory=list) + source_refs: list[str] = Field(default_factory=list) class Module(BaseModel): @@ -16,12 +17,21 @@ class Module(BaseModel): lessons: list[Lesson] = Field(default_factory=list) +class NormalizedSourceRecord(BaseModel): + source_name: str + source_type: str + source_path: str + title: str = "" + modules: list[Module] = Field(default_factory=list) + + class NormalizedCourse(BaseModel): title: str source_name: str = "" source_url: str = "" rights_note: str = "" modules: list[Module] = Field(default_factory=list) + source_records: list[NormalizedSourceRecord] = Field(default_factory=list) class ConceptCandidate(BaseModel): @@ -42,3 +52,4 @@ class DraftPack(BaseModel): rubrics: dict review_report: list[str] = Field(default_factory=list) attribution: dict = Field(default_factory=dict) + conflicts: list[str] = Field(default_factory=list) diff --git a/src/didactopus/main.py b/src/didactopus/main.py index d113ef5..253b5b6 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -4,17 +4,16 @@ import argparse from pathlib import Path from .config import load_config -from .course_ingest import parse_markdown_course, extract_concept_candidates +from .course_ingest import parse_source_file, merge_source_records, extract_concept_candidates from .rule_policy import RuleContext, build_default_rules, run_rules +from .conflict_report import detect_duplicate_lessons, detect_term_conflicts, detect_thin_concepts from .pack_emitter import build_draft_pack, write_draft_pack def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Didactopus course-to-pack ingestion pipeline") - parser.add_argument("--input", required=True) - parser.add_argument("--title", required=True) - parser.add_argument("--source-name", default="") - parser.add_argument("--source-url", default="") + parser = argparse.ArgumentParser(description="Didactopus multi-source course-to-pack ingestion pipeline") + parser.add_argument("--inputs", nargs="+", required=True, help="Input source files") + parser.add_argument("--title", required=True, help="Course or topic title") parser.add_argument("--rights-note", default="REVIEW REQUIRED") parser.add_argument("--output-dir", default="generated-pack") parser.add_argument("--config", default="configs/config.example.yaml") @@ -24,14 +23,13 @@ def build_parser() -> argparse.ArgumentParser: def main() -> None: args = build_parser().parse_args() config = load_config(args.config) - text = Path(args.input).read_text(encoding="utf-8") - course = parse_markdown_course( - text=text, - title=args.title, - source_name=args.source_name, - source_url=args.source_url, + records = [parse_source_file(path, title=args.title) for path in args.inputs] + course = merge_source_records( + records=records, + course_title=args.title, rights_note=args.rights_note, + merge_same_named_lessons=config.multisource.merge_same_named_lessons, ) concepts = extract_concept_candidates(course) context = RuleContext(course=course, concepts=concepts) @@ -44,20 +42,30 @@ def main() -> None: ) run_rules(context, rules) + conflicts = [] + if config.multisource.detect_duplicate_lessons: + conflicts.extend(detect_duplicate_lessons(course)) + if config.multisource.detect_term_conflicts: + conflicts.extend(detect_term_conflicts(course)) + conflicts.extend(detect_thin_concepts(context.concepts)) + draft = build_draft_pack( course=course, concepts=context.concepts, author=config.course_ingest.default_pack_author, license_name=config.course_ingest.default_license, review_flags=context.review_flags, + conflicts=conflicts, ) write_draft_pack(draft, args.output_dir) - print("== Didactopus Course-to-Pack Ingest ==") + print("== Didactopus Multi-Source Course Ingest ==") print(f"Course: {course.title}") + print(f"Sources: {len(records)}") print(f"Modules: {len(course.modules)}") print(f"Concept candidates: {len(context.concepts)}") print(f"Review flags: {len(context.review_flags)}") + print(f"Conflicts: {len(conflicts)}") print(f"Output dir: {args.output_dir}") diff --git a/src/didactopus/pack_emitter.py b/src/didactopus/pack_emitter.py index 8b4d403..87899e7 100644 --- a/src/didactopus/pack_emitter.py +++ b/src/didactopus/pack_emitter.py @@ -6,7 +6,7 @@ import yaml from .course_schema import NormalizedCourse, ConceptCandidate, DraftPack -def build_draft_pack(course: NormalizedCourse, concepts: list[ConceptCandidate], author: str, license_name: str, review_flags: list[str]) -> DraftPack: +def build_draft_pack(course: NormalizedCourse, concepts: list[ConceptCandidate], author: str, license_name: str, review_flags: list[str], conflicts: list[str]) -> DraftPack: pack_name = course.title.lower().replace(" ", "-") pack = { "name": pack_name, @@ -15,7 +15,7 @@ def build_draft_pack(course: NormalizedCourse, concepts: list[ConceptCandidate], "schema_version": "1", "didactopus_min_version": "0.1.0", "didactopus_max_version": "0.9.99", - "description": f"Draft pack generated from course source '{course.source_name or course.title}'.", + "description": f"Draft pack generated from multi-source course inputs for '{course.title}'.", "author": author, "license": license_name, "dependencies": [], @@ -61,8 +61,23 @@ def build_draft_pack(course: NormalizedCourse, concepts: list[ConceptCandidate], }) projects = {"projects": project_items} rubrics = {"rubrics": [{"id": "draft-rubric", "title": "Draft Rubric", "criteria": ["correctness", "explanation"]}]} - attribution = {"source_name": course.source_name, "source_url": course.source_url, "rights_note": course.rights_note} - return DraftPack(pack=pack, concepts=concepts_yaml, roadmap=roadmap, projects=projects, rubrics=rubrics, review_report=review_flags, attribution=attribution) + attribution = { + "rights_note": course.rights_note, + "sources": [ + {"source_name": src.source_name, "source_type": src.source_type, "source_path": src.source_path} + for src in course.source_records + ], + } + return DraftPack( + pack=pack, + concepts=concepts_yaml, + roadmap=roadmap, + projects=projects, + rubrics=rubrics, + review_report=review_flags, + attribution=attribution, + conflicts=conflicts, + ) def write_draft_pack(pack: DraftPack, outdir: str | Path) -> None: @@ -73,6 +88,11 @@ def write_draft_pack(pack: DraftPack, outdir: str | Path) -> None: (out / "roadmap.yaml").write_text(yaml.safe_dump(pack.roadmap, sort_keys=False), encoding="utf-8") (out / "projects.yaml").write_text(yaml.safe_dump(pack.projects, sort_keys=False), encoding="utf-8") (out / "rubrics.yaml").write_text(yaml.safe_dump(pack.rubrics, sort_keys=False), encoding="utf-8") + review_lines = ["# Review Report", ""] + [f"- {flag}" for flag in pack.review_report] if pack.review_report else ["# Review Report", "", "- none"] (out / "review_report.md").write_text("\n".join(review_lines), encoding="utf-8") + + conflict_lines = ["# Conflict Report", ""] + [f"- {flag}" for flag in pack.conflicts] if pack.conflicts else ["# Conflict Report", "", "- none"] + (out / "conflict_report.md").write_text("\n".join(conflict_lines), encoding="utf-8") + (out / "license_attribution.json").write_text(json.dumps(pack.attribution, indent=2), encoding="utf-8") diff --git a/tests/test_conflict_report.py b/tests/test_conflict_report.py new file mode 100644 index 0000000..e32d00e --- /dev/null +++ b/tests/test_conflict_report.py @@ -0,0 +1,15 @@ +from pathlib import Path +from didactopus.course_ingest import parse_source_file, merge_source_records, extract_concept_candidates +from didactopus.conflict_report import detect_duplicate_lessons, detect_term_conflicts, detect_thin_concepts + + +def test_conflict_detection(tmp_path: Path) -> None: + a = tmp_path / "a.md" + b = tmp_path / "b.md" + a.write_text("# C\n\n## M1\n### Bayesian Updating\nPrior and Posterior are discussed here.", encoding="utf-8") + b.write_text("# C\n\n## M2\n### Bayesian Updating\nPrior and Posterior appear again.", encoding="utf-8") + course = merge_source_records([parse_source_file(a, title="Course"), parse_source_file(b, title="Course")], course_title="Course", merge_same_named_lessons=False) + concepts = extract_concept_candidates(course) + assert isinstance(detect_duplicate_lessons(course), list) + assert isinstance(detect_term_conflicts(course), list) + assert isinstance(detect_thin_concepts(concepts), list) diff --git a/tests/test_multisource_ingest.py b/tests/test_multisource_ingest.py new file mode 100644 index 0000000..f9686f2 --- /dev/null +++ b/tests/test_multisource_ingest.py @@ -0,0 +1,23 @@ +from pathlib import Path +from didactopus.course_ingest import parse_source_file, merge_source_records, extract_concept_candidates + + +def test_merge_source_records(tmp_path: Path) -> None: + a = tmp_path / "a.md" + b = tmp_path / "b.transcript.txt" + a.write_text("# C\n\n## M1\n### L1\n- Objective: Explain A.\nText A.", encoding="utf-8") + b.write_text("# C\n\n## M1\n### L1\nExtra transcript detail.", encoding="utf-8") + + records = [parse_source_file(a, title="Course"), parse_source_file(b, title="Course")] + course = merge_source_records(records, course_title="Course") + assert len(course.modules) == 1 + assert len(course.modules[0].lessons) == 1 + assert len(course.modules[0].lessons[0].source_refs) >= 1 + + +def test_extract_candidates_from_merged(tmp_path: Path) -> None: + a = tmp_path / "a.md" + a.write_text("# C\n\n## M1\n### Lesson A\n- Objective: Explain Topic A.\nBody.", encoding="utf-8") + course = merge_source_records([parse_source_file(a, title="Course")], course_title="Course") + concepts = extract_concept_candidates(course) + assert len(concepts) >= 1 diff --git a/tests/test_pack_output.py b/tests/test_pack_output.py new file mode 100644 index 0000000..6e4ad11 --- /dev/null +++ b/tests/test_pack_output.py @@ -0,0 +1,17 @@ +from pathlib import Path +from didactopus.course_ingest import parse_source_file, merge_source_records, extract_concept_candidates +from didactopus.rule_policy import RuleContext, build_default_rules, run_rules +from didactopus.pack_emitter import build_draft_pack, write_draft_pack + + +def test_emit_multisource_pack(tmp_path: Path) -> None: + src = tmp_path / "course.md" + src.write_text("# C\n\n## M1\n### Lesson A\n- Objective: Explain Topic A.\n- Exercise: Do task A.\nTopic A body.", encoding="utf-8") + course = merge_source_records([parse_source_file(src, title="Course")], course_title="Course") + concepts = extract_concept_candidates(course) + ctx = RuleContext(course=course, concepts=concepts) + run_rules(ctx, build_default_rules()) + draft = build_draft_pack(course, ctx.concepts, "Tester", "REVIEW", ctx.review_flags, []) + write_draft_pack(draft, tmp_path / "out") + assert (tmp_path / "out" / "pack.yaml").exists() + assert (tmp_path / "out" / "conflict_report.md").exists()