Add augmentation bundle support for OCW demos
This commit is contained in:
parent
e3a04b6742
commit
5a25e41043
|
|
@ -0,0 +1,40 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
def load_augmentation_bundle(bundle_dir: str | Path) -> dict[str, str]:
|
||||||
|
base = Path(bundle_dir)
|
||||||
|
manifest_path = None
|
||||||
|
for candidate in ("bundle.yaml", "bundle.yml", "bundle.json"):
|
||||||
|
path = base / candidate
|
||||||
|
if path.exists():
|
||||||
|
manifest_path = path
|
||||||
|
break
|
||||||
|
|
||||||
|
payload: dict = {}
|
||||||
|
if manifest_path is not None:
|
||||||
|
raw = manifest_path.read_text(encoding="utf-8")
|
||||||
|
if manifest_path.suffix.lower() == ".json":
|
||||||
|
payload = json.loads(raw) if raw.strip() else {}
|
||||||
|
else:
|
||||||
|
payload = yaml.safe_load(raw) or {}
|
||||||
|
|
||||||
|
snippets_dir = payload.get("snippets_dir", "snippets")
|
||||||
|
source_inventory = payload.get("source_inventory", "wolfe-sources.yaml")
|
||||||
|
concept_alignment = payload.get("concept_alignment", "snippets/concept-alignment.yaml")
|
||||||
|
|
||||||
|
resolved_snippets = (base / snippets_dir).resolve()
|
||||||
|
resolved_inventory = (base / source_inventory).resolve()
|
||||||
|
resolved_alignment = (base / concept_alignment).resolve()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"bundle_dir": str(base.resolve()),
|
||||||
|
"snippets_dir": str(resolved_snippets),
|
||||||
|
"source_inventory": str(resolved_inventory),
|
||||||
|
"concept_alignment": str(resolved_alignment),
|
||||||
|
"title": str(payload.get("title", base.name)),
|
||||||
|
"description": str(payload.get("description", "")),
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ import sys
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .agentic_loop import AgenticStudentState, integrate_attempt
|
from .agentic_loop import AgenticStudentState, integrate_attempt
|
||||||
|
from .augmentation_bundle import load_augmentation_bundle
|
||||||
from .artifact_registry import validate_pack
|
from .artifact_registry import validate_pack
|
||||||
from .course_ingestion_compliance import build_pack_compliance_manifest, load_sources, write_manifest
|
from .course_ingestion_compliance import build_pack_compliance_manifest, load_sources, write_manifest
|
||||||
from .course_repo import bootstrap_course_repo, resolve_course_repo
|
from .course_repo import bootstrap_course_repo, resolve_course_repo
|
||||||
|
|
@ -227,6 +228,24 @@ def _load_wolfe_concept_alignment(wolfe_snippets_dir: Path | None) -> dict[str,
|
||||||
return mapping
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def _load_wolfe_concept_alignment_path(path: Path | None) -> dict[str, str]:
|
||||||
|
if path is None or not path.exists():
|
||||||
|
return {}
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
payload = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
||||||
|
alignments = payload.get("alignments", []) or []
|
||||||
|
mapping: dict[str, str] = {}
|
||||||
|
for item in alignments:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
source_title = str(item.get("source_title", "")).strip()
|
||||||
|
target_title = str(item.get("target_title", "")).strip()
|
||||||
|
if source_title and target_title:
|
||||||
|
mapping[source_title] = target_title
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
def _apply_concept_alignment(concepts: list, alignment: dict[str, str]) -> list:
|
def _apply_concept_alignment(concepts: list, alignment: dict[str, str]) -> list:
|
||||||
if not alignment:
|
if not alignment:
|
||||||
return concepts
|
return concepts
|
||||||
|
|
@ -317,12 +336,22 @@ def run_ocw_information_entropy_demo(
|
||||||
skill_dir: str | Path,
|
skill_dir: str | Path,
|
||||||
wolfe_snippets_dir: str | Path | None = None,
|
wolfe_snippets_dir: str | Path | None = None,
|
||||||
wolfe_source_inventory: str | Path | None = None,
|
wolfe_source_inventory: str | Path | None = None,
|
||||||
|
augmentation_bundle: str | Path | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
course_source = Path(course_source)
|
course_source = Path(course_source)
|
||||||
source_inventory = Path(source_inventory)
|
source_inventory = Path(source_inventory)
|
||||||
pack_dir = Path(pack_dir)
|
pack_dir = Path(pack_dir)
|
||||||
run_dir = Path(run_dir)
|
run_dir = Path(run_dir)
|
||||||
skill_dir = Path(skill_dir)
|
skill_dir = Path(skill_dir)
|
||||||
|
augmentation_bundle = Path(augmentation_bundle) if augmentation_bundle is not None else None
|
||||||
|
augmentation_alignment_path: Path | None = None
|
||||||
|
augmentation_bundle_title = ""
|
||||||
|
if augmentation_bundle is not None:
|
||||||
|
bundle_payload = load_augmentation_bundle(augmentation_bundle)
|
||||||
|
wolfe_snippets_dir = Path(bundle_payload["snippets_dir"])
|
||||||
|
wolfe_source_inventory = Path(bundle_payload["source_inventory"])
|
||||||
|
augmentation_alignment_path = Path(bundle_payload["concept_alignment"])
|
||||||
|
augmentation_bundle_title = bundle_payload.get("title", "")
|
||||||
wolfe_snippets_dir = Path(wolfe_snippets_dir) if wolfe_snippets_dir is not None else None
|
wolfe_snippets_dir = Path(wolfe_snippets_dir) if wolfe_snippets_dir is not None else None
|
||||||
wolfe_source_inventory = Path(wolfe_source_inventory) if wolfe_source_inventory is not None else None
|
wolfe_source_inventory = Path(wolfe_source_inventory) if wolfe_source_inventory is not None else None
|
||||||
|
|
||||||
|
|
@ -349,7 +378,8 @@ def run_ocw_information_entropy_demo(
|
||||||
merged.rights_note = DEFAULT_RIGHTS_NOTE
|
merged.rights_note = DEFAULT_RIGHTS_NOTE
|
||||||
|
|
||||||
concepts = extract_concept_candidates(merged)
|
concepts = extract_concept_candidates(merged)
|
||||||
concepts = _apply_concept_alignment(concepts, _load_wolfe_concept_alignment(wolfe_snippets_dir))
|
alignment = _load_wolfe_concept_alignment_path(augmentation_alignment_path) if augmentation_alignment_path is not None else _load_wolfe_concept_alignment(wolfe_snippets_dir)
|
||||||
|
concepts = _apply_concept_alignment(concepts, alignment)
|
||||||
ctx = RuleContext(course=merged, concepts=concepts)
|
ctx = RuleContext(course=merged, concepts=concepts)
|
||||||
run_rules(ctx, build_default_rules())
|
run_rules(ctx, build_default_rules())
|
||||||
if review_flag:
|
if review_flag:
|
||||||
|
|
@ -453,6 +483,8 @@ def run_ocw_information_entropy_demo(
|
||||||
"effective_source_inventory": str(effective_inventory_path),
|
"effective_source_inventory": str(effective_inventory_path),
|
||||||
"wolfe_snippets_dir": str(wolfe_snippets_dir) if wolfe_snippets_dir is not None else "",
|
"wolfe_snippets_dir": str(wolfe_snippets_dir) if wolfe_snippets_dir is not None else "",
|
||||||
"wolfe_source_inventory": str(wolfe_source_inventory) if wolfe_source_inventory is not None else "",
|
"wolfe_source_inventory": str(wolfe_source_inventory) if wolfe_source_inventory is not None else "",
|
||||||
|
"augmentation_bundle": str(augmentation_bundle) if augmentation_bundle is not None else "",
|
||||||
|
"augmentation_bundle_title": augmentation_bundle_title,
|
||||||
"wolfe_source_document_count": wolfe_doc_count,
|
"wolfe_source_document_count": wolfe_doc_count,
|
||||||
"review_flags": list(ctx.review_flags),
|
"review_flags": list(ctx.review_flags),
|
||||||
"concept_count": len(ctx.concepts),
|
"concept_count": len(ctx.concepts),
|
||||||
|
|
@ -489,6 +521,7 @@ def main() -> None:
|
||||||
parser.add_argument("--skill-dir")
|
parser.add_argument("--skill-dir")
|
||||||
parser.add_argument("--wolfe-snippets-dir")
|
parser.add_argument("--wolfe-snippets-dir")
|
||||||
parser.add_argument("--wolfe-source-inventory")
|
parser.add_argument("--wolfe-source-inventory")
|
||||||
|
parser.add_argument("--augmentation-bundle")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.course_repo_target:
|
if args.course_repo_target:
|
||||||
|
|
@ -517,6 +550,7 @@ def main() -> None:
|
||||||
skill_dir=resolved["skill_dir"],
|
skill_dir=resolved["skill_dir"],
|
||||||
wolfe_snippets_dir=args.wolfe_snippets_dir,
|
wolfe_snippets_dir=args.wolfe_snippets_dir,
|
||||||
wolfe_source_inventory=args.wolfe_source_inventory,
|
wolfe_source_inventory=args.wolfe_source_inventory,
|
||||||
|
augmentation_bundle=args.augmentation_bundle,
|
||||||
)
|
)
|
||||||
print(json.dumps(summary, indent=2))
|
print(json.dumps(summary, indent=2))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from didactopus.augmentation_bundle import load_augmentation_bundle
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_augmentation_bundle_defaults(tmp_path: Path) -> None:
|
||||||
|
(tmp_path / "snippets").mkdir()
|
||||||
|
(tmp_path / "snippets" / "concept-alignment.yaml").write_text("alignments: []\n", encoding="utf-8")
|
||||||
|
(tmp_path / "wolfe-sources.yaml").write_text("sources: []\n", encoding="utf-8")
|
||||||
|
|
||||||
|
payload = load_augmentation_bundle(tmp_path)
|
||||||
|
assert payload["snippets_dir"].endswith("/snippets")
|
||||||
|
assert payload["source_inventory"].endswith("/wolfe-sources.yaml")
|
||||||
|
assert payload["concept_alignment"].endswith("/snippets/concept-alignment.yaml")
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_augmentation_bundle_manifest_overrides_paths(tmp_path: Path) -> None:
|
||||||
|
(tmp_path / "extras").mkdir()
|
||||||
|
(tmp_path / "extras" / "map.yaml").write_text("alignments: []\n", encoding="utf-8")
|
||||||
|
(tmp_path / "inventory.yaml").write_text("sources: []\n", encoding="utf-8")
|
||||||
|
(tmp_path / "bundle.yaml").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"title: Demo Bundle",
|
||||||
|
"snippets_dir: extras",
|
||||||
|
"source_inventory: inventory.yaml",
|
||||||
|
"concept_alignment: extras/map.yaml",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = load_augmentation_bundle(tmp_path)
|
||||||
|
assert payload["title"] == "Demo Bundle"
|
||||||
|
assert payload["snippets_dir"].endswith("/extras")
|
||||||
|
assert payload["source_inventory"].endswith("/inventory.yaml")
|
||||||
|
assert payload["concept_alignment"].endswith("/extras/map.yaml")
|
||||||
|
|
@ -125,3 +125,76 @@ def test_ocw_demo_can_apply_wolfe_snippet_augmentation(tmp_path: Path) -> None:
|
||||||
assert "wolfe-local-snippet" in manifest["derived_from_sources"]
|
assert "wolfe-local-snippet" in manifest["derived_from_sources"]
|
||||||
assert bundle["bundle_kind"] == "groundrecall_query_bundle"
|
assert bundle["bundle_kind"] == "groundrecall_query_bundle"
|
||||||
assert "Entropy Comparison" not in concept_titles
|
assert "Entropy Comparison" not in concept_titles
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocw_demo_can_load_augmentation_bundle(tmp_path: Path) -> None:
|
||||||
|
source_dir = tmp_path / "course"
|
||||||
|
source_dir.mkdir()
|
||||||
|
(source_dir / "unit1.md").write_text(
|
||||||
|
"# Course\n\n## Unit 1\n### Thermodynamics and Entropy\n- Objective: Explain entropy.\nEntropy links uncertainty to physics.",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
sources = tmp_path / "sources.yaml"
|
||||||
|
sources.write_text("sources: []\n", encoding="utf-8")
|
||||||
|
|
||||||
|
bundle_dir = tmp_path / "bundle"
|
||||||
|
snippets_dir = bundle_dir / "snippets"
|
||||||
|
snippets_dir.mkdir(parents=True)
|
||||||
|
(snippets_dir / "snippet.md").write_text(
|
||||||
|
"# Wolfe Snippet\n\n## Augmentation\n### Entropy Comparison\n- Objective: Compare Shannon entropy with thermodynamic entropy.\nThe two notions differ in interpretation even when the mathematics overlaps.",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(bundle_dir / "sources.yaml").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"sources:",
|
||||||
|
" - source_id: wolfe-local-snippet",
|
||||||
|
" title: Wolfe local snippet",
|
||||||
|
" url: file:///local/wolfe/snippet",
|
||||||
|
" publisher: Local Library",
|
||||||
|
" creator: Local Search",
|
||||||
|
" license_id: local-only",
|
||||||
|
" license_url: https://example.invalid/local-only",
|
||||||
|
" retrieved_at: '2026-05-08'",
|
||||||
|
" adapted: false",
|
||||||
|
" attribution_text: Local Wolfe-derived snippet for private evaluation.",
|
||||||
|
" excluded_from_upstream_license: true",
|
||||||
|
" exclusion_notes: Local-only experimental augmentation.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(snippets_dir / "concept-alignment.yaml").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"alignments:",
|
||||||
|
" - source_title: Entropy Comparison",
|
||||||
|
" target_title: Thermodynamics and Entropy",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(bundle_dir / "bundle.yaml").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"title: OCW Wolfe Bundle",
|
||||||
|
"snippets_dir: snippets",
|
||||||
|
"source_inventory: sources.yaml",
|
||||||
|
"concept_alignment: snippets/concept-alignment.yaml",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = run_ocw_information_entropy_demo(
|
||||||
|
course_source=source_dir,
|
||||||
|
source_inventory=sources,
|
||||||
|
pack_dir=tmp_path / "pack",
|
||||||
|
run_dir=tmp_path / "run",
|
||||||
|
skill_dir=tmp_path / "skill",
|
||||||
|
augmentation_bundle=bundle_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert summary["augmentation_bundle"].endswith("/bundle")
|
||||||
|
assert summary["augmentation_bundle_title"] == "OCW Wolfe Bundle"
|
||||||
|
assert summary["wolfe_source_document_count"] == 1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue