From b7e2f9f54023e254facaa17270de640961c4efb7 Mon Sep 17 00:00:00 2001 From: welsberr Date: Thu, 23 Apr 2026 07:23:34 -0400 Subject: [PATCH] Add stable doclift bundle fixture --- README.md | 20 +++++++++++ .../documents/lecture-1/document.figures.json | 9 +++++ .../documents/lecture-1/document.layout.json | 8 +++++ .../documents/lecture-1/document.md | 9 +++++ .../documents/lecture-1/document.tables.json | 13 +++++++ .../doclift_bundle_minimal/manifest.json | 16 +++++++++ tests/test_doclift_bundle_demo.py | 35 +++---------------- 7 files changed, 80 insertions(+), 30 deletions(-) create mode 100755 tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json create mode 100755 tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json create mode 100755 tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.md create mode 100755 tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json create mode 100755 tests/fixtures/doclift_bundle_minimal/manifest.json diff --git a/README.md b/README.md index 6409172..b6110b3 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,26 @@ For the fastest included example, use the MIT OCW Information and Entropy demo. - progress visualization - skill export +## `doclift` Bundle Ingestion + +When your source material starts as legacy office documents, the intended +boundary is: + +1. `doclift` normalizes the source tree into a bundle. +2. `Didactopus` turns that bundle into a draft pack and learning path. +3. `GroundRecall` can import the same bundle directly when you need canonical + knowledge storage instead of a learner pack. + +Example: + +```bash +doclift convert-dir /path/to/legacy-course /tmp/doclift-bundle --asset-root /path/to/legacy-course +didactopus doclift-bundle /tmp/doclift-bundle /tmp/didactopus-pack --course-title "Example Course" +``` + +That command writes the normal draft-pack outputs plus a +`doclift_bundle_summary.json` file that records the bundle-to-pack conversion. + ## Didactopus As Pedagogy Support Didactopus is broader than a learner chat loop. diff --git a/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json new file mode 100755 index 0000000..c55670f --- /dev/null +++ b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json @@ -0,0 +1,9 @@ +{ + "source_path": "legacy/lecture-1.doc", + "figure_references": [ + { + "label": "Figure 1", + "caption": "Example figure caption" + } + ] +} diff --git a/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json new file mode 100755 index 0000000..e777268 --- /dev/null +++ b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json @@ -0,0 +1,8 @@ +[ + { + "line_index": 0, + "text": "Lecture 1. Example", + "kind": "heading", + "indent": 0 + } +] diff --git a/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.md b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.md new file mode 100755 index 0000000..421282c --- /dev/null +++ b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.md @@ -0,0 +1,9 @@ +# Lecture 1. Example + +## Module A + +### Lesson A + +- Objective: Explain lesson A. + +Body text that grounds the example lesson. diff --git a/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json new file mode 100755 index 0000000..2fdf31d --- /dev/null +++ b/tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json @@ -0,0 +1,13 @@ +{ + "source_path": "legacy/lecture-1.doc", + "tables": [ + { + "table_id": "table-1", + "caption": "Example table", + "rows": [ + ["Column A", "Column B"], + ["1", "2"] + ] + } + ] +} diff --git a/tests/fixtures/doclift_bundle_minimal/manifest.json b/tests/fixtures/doclift_bundle_minimal/manifest.json new file mode 100755 index 0000000..c473239 --- /dev/null +++ b/tests/fixtures/doclift_bundle_minimal/manifest.json @@ -0,0 +1,16 @@ +{ + "documents": [ + { + "document_id": "lecture-1", + "title": "Lecture 1. Example", + "document_kind": "lecture", + "output_dir": "documents/lecture-1", + "markdown_path": "documents/lecture-1/document.md", + "layout_path": "documents/lecture-1/document.layout.json", + "tables_path": "documents/lecture-1/document.tables.json", + "figures_path": "documents/lecture-1/document.figures.json", + "table_count": 1, + "figure_reference_count": 1 + } + ] +} diff --git a/tests/test_doclift_bundle_demo.py b/tests/test_doclift_bundle_demo.py index c310394..76a3e84 100755 --- a/tests/test_doclift_bundle_demo.py +++ b/tests/test_doclift_bundle_demo.py @@ -1,41 +1,16 @@ from __future__ import annotations -import json from pathlib import Path from didactopus.doclift_bundle_demo import run_doclift_bundle_demo -def test_doclift_bundle_demo_generates_pack(tmp_path: Path) -> None: - bundle = tmp_path / "bundle" - doc_dir = bundle / "documents" / "lesson-a" - doc_dir.mkdir(parents=True) - (bundle / "manifest.json").write_text( - json.dumps( - { - "documents": [ - { - "title": "Lecture 1. Example", - "document_kind": "lecture", - "output_dir": str(doc_dir), - "layout_path": str(doc_dir / "document.layout.json"), - "tables_path": str(doc_dir / "document.tables.json"), - "figures_path": str(doc_dir / "document.figures.json"), - } - ] - } - ), - encoding="utf-8", - ) - (doc_dir / "document.md").write_text( - "# Lecture 1. Example\n\n## Module A\n### Lesson A\n- Objective: Explain lesson A.\nBody text.", - encoding="utf-8", - ) - (doc_dir / "document.layout.json").write_text("[]", encoding="utf-8") - (doc_dir / "document.tables.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "tables": []}), encoding="utf-8") - (doc_dir / "document.figures.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "figure_references": []}), encoding="utf-8") +def _fixture_bundle() -> Path: + return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal" - summary = run_doclift_bundle_demo(bundle, "Example Course", tmp_path / "pack") + +def test_doclift_bundle_demo_generates_pack(tmp_path: Path) -> None: + summary = run_doclift_bundle_demo(_fixture_bundle(), "Example Course", tmp_path / "pack") assert summary["source_document_count"] == 1 assert (tmp_path / "pack" / "pack.yaml").exists()