From 8d2b6928a813ba0aabbb547b6a9db48d57bd1557 Mon Sep 17 00:00:00 2001 From: welsberr Date: Wed, 22 Apr 2026 21:37:09 -0400 Subject: [PATCH] Add doclift bundle pack demo --- src/didactopus/doclift_bundle_demo.py | 86 +++++++++++++++++++++++++++ tests/test_doclift_bundle_demo.py | 44 ++++++++++++++ 2 files changed, 130 insertions(+) create mode 100755 src/didactopus/doclift_bundle_demo.py create mode 100755 tests/test_doclift_bundle_demo.py diff --git a/src/didactopus/doclift_bundle_demo.py b/src/didactopus/doclift_bundle_demo.py new file mode 100755 index 0000000..5357400 --- /dev/null +++ b/src/didactopus/doclift_bundle_demo.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from .artifact_registry import validate_pack +from .document_adapters import adapt_documents +from .knowledge_graph import write_knowledge_graph +from .pack_emitter import build_draft_pack, write_draft_pack, write_source_corpus +from .rule_policy import RuleContext, build_default_rules, run_rules +from .topic_ingest import build_topic_bundle, document_to_course, extract_concept_candidates, merge_courses_into_topic_course + + +def run_doclift_bundle_demo( + bundle_dir: str | Path, + course_title: str, + pack_dir: str | Path, + author: str = "doclift bundle import", + license_name: str = "See source bundle metadata", +) -> dict: + bundle_dir = Path(bundle_dir) + pack_dir = Path(pack_dir) + + docs = adapt_documents(bundle_dir) + if not docs: + raise ValueError(f"No documents found in doclift bundle {bundle_dir}") + + courses = [document_to_course(doc, course_title) for doc in docs] + merged = merge_courses_into_topic_course(build_topic_bundle(course_title, courses)) + concepts = extract_concept_candidates(merged) + lesson_concept_ids = {concept.id for concept in concepts if concept.title in {lesson.title for module in merged.modules for lesson in module.lessons}} + concepts = [concept for concept in concepts if concept.id in lesson_concept_ids] + ctx = RuleContext(course=merged, concepts=concepts) + run_rules(ctx, build_default_rules(enable_projects=False, enable_review=False)) + + draft = build_draft_pack( + merged, + ctx.concepts, + author=author, + license_name=license_name, + review_flags=ctx.review_flags, + conflicts=[], + ) + write_draft_pack(draft, pack_dir) + write_source_corpus(merged, pack_dir) + write_knowledge_graph(merged, ctx.concepts, pack_dir) + + validation = validate_pack(pack_dir) + if not validation.is_valid: + raise ValueError(f"Generated pack failed validation: {validation.errors}") + + summary = { + "bundle_dir": str(bundle_dir), + "course_title": course_title, + "pack_dir": str(pack_dir), + "source_document_count": len(docs), + "module_count": len(merged.modules), + "concept_count": len(ctx.concepts), + "review_flags": list(ctx.review_flags), + } + (pack_dir / "doclift_bundle_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8") + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate a Didactopus draft pack from a doclift bundle.") + parser.add_argument("bundle_dir") + parser.add_argument("pack_dir") + parser.add_argument("--course-title", required=True) + parser.add_argument("--author", default="doclift bundle import") + parser.add_argument("--license-name", default="See source bundle metadata") + args = parser.parse_args() + + summary = run_doclift_bundle_demo( + bundle_dir=args.bundle_dir, + course_title=args.course_title, + pack_dir=args.pack_dir, + author=args.author, + license_name=args.license_name, + ) + print(json.dumps(summary, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/tests/test_doclift_bundle_demo.py b/tests/test_doclift_bundle_demo.py new file mode 100755 index 0000000..c310394 --- /dev/null +++ b/tests/test_doclift_bundle_demo.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from didactopus.doclift_bundle_demo import run_doclift_bundle_demo + + +def test_doclift_bundle_demo_generates_pack(tmp_path: Path) -> None: + bundle = tmp_path / "bundle" + doc_dir = bundle / "documents" / "lesson-a" + doc_dir.mkdir(parents=True) + (bundle / "manifest.json").write_text( + json.dumps( + { + "documents": [ + { + "title": "Lecture 1. Example", + "document_kind": "lecture", + "output_dir": str(doc_dir), + "layout_path": str(doc_dir / "document.layout.json"), + "tables_path": str(doc_dir / "document.tables.json"), + "figures_path": str(doc_dir / "document.figures.json"), + } + ] + } + ), + encoding="utf-8", + ) + (doc_dir / "document.md").write_text( + "# Lecture 1. Example\n\n## Module A\n### Lesson A\n- Objective: Explain lesson A.\nBody text.", + encoding="utf-8", + ) + (doc_dir / "document.layout.json").write_text("[]", encoding="utf-8") + (doc_dir / "document.tables.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "tables": []}), encoding="utf-8") + (doc_dir / "document.figures.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "figure_references": []}), encoding="utf-8") + + summary = run_doclift_bundle_demo(bundle, "Example Course", tmp_path / "pack") + + assert summary["source_document_count"] == 1 + assert (tmp_path / "pack" / "pack.yaml").exists() + assert (tmp_path / "pack" / "source_corpus.json").exists() + assert (tmp_path / "pack" / "knowledge_graph.json").exists() + assert (tmp_path / "pack" / "doclift_bundle_summary.json").exists()