diff --git a/src/didactopus/augmentation_bundle.py b/src/didactopus/augmentation_bundle.py new file mode 100644 index 0000000..8cbbdb9 --- /dev/null +++ b/src/didactopus/augmentation_bundle.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from pathlib import Path +import json +import yaml + + +def load_augmentation_bundle(bundle_dir: str | Path) -> dict[str, str]: + base = Path(bundle_dir) + manifest_path = None + for candidate in ("bundle.yaml", "bundle.yml", "bundle.json"): + path = base / candidate + if path.exists(): + manifest_path = path + break + + payload: dict = {} + if manifest_path is not None: + raw = manifest_path.read_text(encoding="utf-8") + if manifest_path.suffix.lower() == ".json": + payload = json.loads(raw) if raw.strip() else {} + else: + payload = yaml.safe_load(raw) or {} + + snippets_dir = payload.get("snippets_dir", "snippets") + source_inventory = payload.get("source_inventory", "wolfe-sources.yaml") + concept_alignment = payload.get("concept_alignment", "snippets/concept-alignment.yaml") + + resolved_snippets = (base / snippets_dir).resolve() + resolved_inventory = (base / source_inventory).resolve() + resolved_alignment = (base / concept_alignment).resolve() + + return { + "bundle_dir": str(base.resolve()), + "snippets_dir": str(resolved_snippets), + "source_inventory": str(resolved_inventory), + "concept_alignment": str(resolved_alignment), + "title": str(payload.get("title", base.name)), + "description": str(payload.get("description", "")), + } diff --git a/src/didactopus/ocw_information_entropy_demo.py b/src/didactopus/ocw_information_entropy_demo.py index 46d1078..c1c379a 100644 --- a/src/didactopus/ocw_information_entropy_demo.py +++ b/src/didactopus/ocw_information_entropy_demo.py @@ -6,6 +6,7 @@ import sys import re from .agentic_loop import AgenticStudentState, integrate_attempt +from .augmentation_bundle import load_augmentation_bundle from .artifact_registry import validate_pack from .course_ingestion_compliance import build_pack_compliance_manifest, load_sources, write_manifest from .course_repo import bootstrap_course_repo, resolve_course_repo @@ -227,6 +228,24 @@ def _load_wolfe_concept_alignment(wolfe_snippets_dir: Path | None) -> dict[str, return mapping +def _load_wolfe_concept_alignment_path(path: Path | None) -> dict[str, str]: + if path is None or not path.exists(): + return {} + import yaml + + payload = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + alignments = payload.get("alignments", []) or [] + mapping: dict[str, str] = {} + for item in alignments: + if not isinstance(item, dict): + continue + source_title = str(item.get("source_title", "")).strip() + target_title = str(item.get("target_title", "")).strip() + if source_title and target_title: + mapping[source_title] = target_title + return mapping + + def _apply_concept_alignment(concepts: list, alignment: dict[str, str]) -> list: if not alignment: return concepts @@ -317,12 +336,22 @@ def run_ocw_information_entropy_demo( skill_dir: str | Path, wolfe_snippets_dir: str | Path | None = None, wolfe_source_inventory: str | Path | None = None, + augmentation_bundle: str | Path | None = None, ) -> dict: course_source = Path(course_source) source_inventory = Path(source_inventory) pack_dir = Path(pack_dir) run_dir = Path(run_dir) skill_dir = Path(skill_dir) + augmentation_bundle = Path(augmentation_bundle) if augmentation_bundle is not None else None + augmentation_alignment_path: Path | None = None + augmentation_bundle_title = "" + if augmentation_bundle is not None: + bundle_payload = load_augmentation_bundle(augmentation_bundle) + wolfe_snippets_dir = Path(bundle_payload["snippets_dir"]) + wolfe_source_inventory = Path(bundle_payload["source_inventory"]) + augmentation_alignment_path = Path(bundle_payload["concept_alignment"]) + augmentation_bundle_title = bundle_payload.get("title", "") wolfe_snippets_dir = Path(wolfe_snippets_dir) if wolfe_snippets_dir is not None else None wolfe_source_inventory = Path(wolfe_source_inventory) if wolfe_source_inventory is not None else None @@ -349,7 +378,8 @@ def run_ocw_information_entropy_demo( merged.rights_note = DEFAULT_RIGHTS_NOTE concepts = extract_concept_candidates(merged) - concepts = _apply_concept_alignment(concepts, _load_wolfe_concept_alignment(wolfe_snippets_dir)) + alignment = _load_wolfe_concept_alignment_path(augmentation_alignment_path) if augmentation_alignment_path is not None else _load_wolfe_concept_alignment(wolfe_snippets_dir) + concepts = _apply_concept_alignment(concepts, alignment) ctx = RuleContext(course=merged, concepts=concepts) run_rules(ctx, build_default_rules()) if review_flag: @@ -453,6 +483,8 @@ def run_ocw_information_entropy_demo( "effective_source_inventory": str(effective_inventory_path), "wolfe_snippets_dir": str(wolfe_snippets_dir) if wolfe_snippets_dir is not None else "", "wolfe_source_inventory": str(wolfe_source_inventory) if wolfe_source_inventory is not None else "", + "augmentation_bundle": str(augmentation_bundle) if augmentation_bundle is not None else "", + "augmentation_bundle_title": augmentation_bundle_title, "wolfe_source_document_count": wolfe_doc_count, "review_flags": list(ctx.review_flags), "concept_count": len(ctx.concepts), @@ -489,6 +521,7 @@ def main() -> None: parser.add_argument("--skill-dir") parser.add_argument("--wolfe-snippets-dir") parser.add_argument("--wolfe-source-inventory") + parser.add_argument("--augmentation-bundle") args = parser.parse_args() if args.course_repo_target: @@ -517,6 +550,7 @@ def main() -> None: skill_dir=resolved["skill_dir"], wolfe_snippets_dir=args.wolfe_snippets_dir, wolfe_source_inventory=args.wolfe_source_inventory, + augmentation_bundle=args.augmentation_bundle, ) print(json.dumps(summary, indent=2)) diff --git a/tests/test_augmentation_bundle.py b/tests/test_augmentation_bundle.py new file mode 100644 index 0000000..7d239d5 --- /dev/null +++ b/tests/test_augmentation_bundle.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from didactopus.augmentation_bundle import load_augmentation_bundle + + +def test_load_augmentation_bundle_defaults(tmp_path: Path) -> None: + (tmp_path / "snippets").mkdir() + (tmp_path / "snippets" / "concept-alignment.yaml").write_text("alignments: []\n", encoding="utf-8") + (tmp_path / "wolfe-sources.yaml").write_text("sources: []\n", encoding="utf-8") + + payload = load_augmentation_bundle(tmp_path) + assert payload["snippets_dir"].endswith("/snippets") + assert payload["source_inventory"].endswith("/wolfe-sources.yaml") + assert payload["concept_alignment"].endswith("/snippets/concept-alignment.yaml") + + +def test_load_augmentation_bundle_manifest_overrides_paths(tmp_path: Path) -> None: + (tmp_path / "extras").mkdir() + (tmp_path / "extras" / "map.yaml").write_text("alignments: []\n", encoding="utf-8") + (tmp_path / "inventory.yaml").write_text("sources: []\n", encoding="utf-8") + (tmp_path / "bundle.yaml").write_text( + "\n".join( + [ + "title: Demo Bundle", + "snippets_dir: extras", + "source_inventory: inventory.yaml", + "concept_alignment: extras/map.yaml", + ] + ), + encoding="utf-8", + ) + + payload = load_augmentation_bundle(tmp_path) + assert payload["title"] == "Demo Bundle" + assert payload["snippets_dir"].endswith("/extras") + assert payload["source_inventory"].endswith("/inventory.yaml") + assert payload["concept_alignment"].endswith("/extras/map.yaml") diff --git a/tests/test_ocw_information_entropy_demo.py b/tests/test_ocw_information_entropy_demo.py index 470ccb2..22bc2b5 100644 --- a/tests/test_ocw_information_entropy_demo.py +++ b/tests/test_ocw_information_entropy_demo.py @@ -125,3 +125,76 @@ def test_ocw_demo_can_apply_wolfe_snippet_augmentation(tmp_path: Path) -> None: assert "wolfe-local-snippet" in manifest["derived_from_sources"] assert bundle["bundle_kind"] == "groundrecall_query_bundle" assert "Entropy Comparison" not in concept_titles + + +def test_ocw_demo_can_load_augmentation_bundle(tmp_path: Path) -> None: + source_dir = tmp_path / "course" + source_dir.mkdir() + (source_dir / "unit1.md").write_text( + "# Course\n\n## Unit 1\n### Thermodynamics and Entropy\n- Objective: Explain entropy.\nEntropy links uncertainty to physics.", + encoding="utf-8", + ) + sources = tmp_path / "sources.yaml" + sources.write_text("sources: []\n", encoding="utf-8") + + bundle_dir = tmp_path / "bundle" + snippets_dir = bundle_dir / "snippets" + snippets_dir.mkdir(parents=True) + (snippets_dir / "snippet.md").write_text( + "# Wolfe Snippet\n\n## Augmentation\n### Entropy Comparison\n- Objective: Compare Shannon entropy with thermodynamic entropy.\nThe two notions differ in interpretation even when the mathematics overlaps.", + encoding="utf-8", + ) + (bundle_dir / "sources.yaml").write_text( + "\n".join( + [ + "sources:", + " - source_id: wolfe-local-snippet", + " title: Wolfe local snippet", + " url: file:///local/wolfe/snippet", + " publisher: Local Library", + " creator: Local Search", + " license_id: local-only", + " license_url: https://example.invalid/local-only", + " retrieved_at: '2026-05-08'", + " adapted: false", + " attribution_text: Local Wolfe-derived snippet for private evaluation.", + " excluded_from_upstream_license: true", + " exclusion_notes: Local-only experimental augmentation.", + ] + ), + encoding="utf-8", + ) + (snippets_dir / "concept-alignment.yaml").write_text( + "\n".join( + [ + "alignments:", + " - source_title: Entropy Comparison", + " target_title: Thermodynamics and Entropy", + ] + ), + encoding="utf-8", + ) + (bundle_dir / "bundle.yaml").write_text( + "\n".join( + [ + "title: OCW Wolfe Bundle", + "snippets_dir: snippets", + "source_inventory: sources.yaml", + "concept_alignment: snippets/concept-alignment.yaml", + ] + ), + encoding="utf-8", + ) + + summary = run_ocw_information_entropy_demo( + course_source=source_dir, + source_inventory=sources, + pack_dir=tmp_path / "pack", + run_dir=tmp_path / "run", + skill_dir=tmp_path / "skill", + augmentation_bundle=bundle_dir, + ) + + assert summary["augmentation_bundle"].endswith("/bundle") + assert summary["augmentation_bundle_title"] == "OCW Wolfe Bundle" + assert summary["wolfe_source_document_count"] == 1