Compare commits

...

2 Commits

4 changed files with 83 additions and 1 deletions

View File

@ -18,9 +18,11 @@ def run_doclift_bundle_demo(
pack_dir: str | Path, pack_dir: str | Path,
author: str = "doclift bundle import", author: str = "doclift bundle import",
license_name: str = "See source bundle metadata", license_name: str = "See source bundle metadata",
groundrecall_query_bundle_path: str | Path | None = None,
) -> dict: ) -> dict:
bundle_dir = Path(bundle_dir) bundle_dir = Path(bundle_dir)
pack_dir = Path(pack_dir) pack_dir = Path(pack_dir)
explicit_groundrecall_path = Path(groundrecall_query_bundle_path) if groundrecall_query_bundle_path else None
docs = adapt_documents(bundle_dir) docs = adapt_documents(bundle_dir)
if not docs: if not docs:
@ -33,6 +35,7 @@ def run_doclift_bundle_demo(
concepts = [concept for concept in concepts if concept.id in lesson_concept_ids] concepts = [concept for concept in concepts if concept.id in lesson_concept_ids]
ctx = RuleContext(course=merged, concepts=concepts) ctx = RuleContext(course=merged, concepts=concepts)
run_rules(ctx, build_default_rules(enable_projects=False, enable_review=False)) run_rules(ctx, build_default_rules(enable_projects=False, enable_review=False))
groundrecall_bundle = _load_groundrecall_query_bundle(bundle_dir, explicit_groundrecall_path)
draft = build_draft_pack( draft = build_draft_pack(
merged, merged,
@ -41,6 +44,7 @@ def run_doclift_bundle_demo(
license_name=license_name, license_name=license_name,
review_flags=ctx.review_flags, review_flags=ctx.review_flags,
conflicts=[], conflicts=[],
groundrecall_query_bundle=groundrecall_bundle,
) )
write_draft_pack(draft, pack_dir) write_draft_pack(draft, pack_dir)
write_source_corpus(merged, pack_dir) write_source_corpus(merged, pack_dir)
@ -58,11 +62,28 @@ def run_doclift_bundle_demo(
"module_count": len(merged.modules), "module_count": len(merged.modules),
"concept_count": len(ctx.concepts), "concept_count": len(ctx.concepts),
"review_flags": list(ctx.review_flags), "review_flags": list(ctx.review_flags),
"groundrecall_bundle_included": bool(groundrecall_bundle),
} }
(pack_dir / "doclift_bundle_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8") (pack_dir / "doclift_bundle_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
return summary return summary
def _load_groundrecall_query_bundle(bundle_dir: Path, explicit_path: Path | None) -> dict | None:
candidates = []
if explicit_path is not None:
candidates.append(explicit_path)
candidates.extend(
[
bundle_dir / "groundrecall_query_bundle.json",
bundle_dir / "manifest.groundrecall_query_bundle.json",
]
)
for path in candidates:
if path.exists():
return json.loads(path.read_text(encoding="utf-8"))
return None
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser(description="Generate a Didactopus draft pack from a doclift bundle.") parser = argparse.ArgumentParser(description="Generate a Didactopus draft pack from a doclift bundle.")
parser.add_argument("bundle_dir") parser.add_argument("bundle_dir")
@ -70,6 +91,7 @@ def main() -> None:
parser.add_argument("--course-title", required=True) parser.add_argument("--course-title", required=True)
parser.add_argument("--author", default="doclift bundle import") parser.add_argument("--author", default="doclift bundle import")
parser.add_argument("--license-name", default="See source bundle metadata") parser.add_argument("--license-name", default="See source bundle metadata")
parser.add_argument("--groundrecall-query-bundle", default=None)
args = parser.parse_args() args = parser.parse_args()
summary = run_doclift_bundle_demo( summary = run_doclift_bundle_demo(
@ -78,6 +100,7 @@ def main() -> None:
pack_dir=args.pack_dir, pack_dir=args.pack_dir,
author=args.author, author=args.author,
license_name=args.license_name, license_name=args.license_name,
groundrecall_query_bundle_path=args.groundrecall_query_bundle,
) )
print(json.dumps(summary, indent=2)) print(json.dumps(summary, indent=2))

View File

@ -71,8 +71,12 @@ def build_draft_pack(
license_name: str, license_name: str,
review_flags: list[str], review_flags: list[str],
conflicts: list[str] | None = None, conflicts: list[str] | None = None,
groundrecall_query_bundle: dict | None = None,
) -> DraftPack: ) -> DraftPack:
pack_name = course.title.lower().replace(" ", "-") pack_name = course.title.lower().replace(" ", "-")
supporting_artifacts = ["source_corpus.json", "knowledge_graph.json"]
if groundrecall_query_bundle is not None:
supporting_artifacts.append("groundrecall_query_bundle.json")
pack = { pack = {
"name": pack_name, "name": pack_name,
"display_name": course.title, "display_name": course.title,
@ -87,7 +91,7 @@ def build_draft_pack(
"overrides": [], "overrides": [],
"profile_templates": {}, "profile_templates": {},
"cross_pack_links": [], "cross_pack_links": [],
"supporting_artifacts": ["source_corpus.json", "knowledge_graph.json"], "supporting_artifacts": supporting_artifacts,
} }
concepts_yaml = { concepts_yaml = {
"concepts": [ "concepts": [
@ -134,6 +138,8 @@ def build_draft_pack(
for src in course.source_records for src in course.source_records
], ],
} }
if groundrecall_query_bundle is not None:
attribution["groundrecall_query_bundle"] = groundrecall_query_bundle
return DraftPack( return DraftPack(
pack=pack, pack=pack,
concepts=concepts_yaml, concepts=concepts_yaml,
@ -159,6 +165,11 @@ def write_draft_pack(pack: DraftPack, outdir: str | Path) -> None:
conflict_lines = ["# Conflict Report", ""] + [f"- {flag}" for flag in pack.conflicts] if pack.conflicts else ["# Conflict Report", "", "- none"] conflict_lines = ["# Conflict Report", ""] + [f"- {flag}" for flag in pack.conflicts] if pack.conflicts else ["# Conflict Report", "", "- none"]
(out / "conflict_report.md").write_text("\n".join(conflict_lines), encoding="utf-8") (out / "conflict_report.md").write_text("\n".join(conflict_lines), encoding="utf-8")
(out / "license_attribution.json").write_text(json.dumps(pack.attribution, indent=2), encoding="utf-8") (out / "license_attribution.json").write_text(json.dumps(pack.attribution, indent=2), encoding="utf-8")
if isinstance(pack.attribution.get("groundrecall_query_bundle"), dict):
(out / "groundrecall_query_bundle.json").write_text(
json.dumps(pack.attribution["groundrecall_query_bundle"], indent=2),
encoding="utf-8",
)
def write_source_corpus(course: NormalizedCourse, outdir: str | Path) -> None: def write_source_corpus(course: NormalizedCourse, outdir: str | Path) -> None:

View File

@ -1,6 +1,8 @@
from __future__ import annotations from __future__ import annotations
import json
from pathlib import Path from pathlib import Path
import shutil
from didactopus.doclift_bundle_demo import run_doclift_bundle_demo from didactopus.doclift_bundle_demo import run_doclift_bundle_demo
@ -17,3 +19,23 @@ def test_doclift_bundle_demo_generates_pack(tmp_path: Path) -> None:
assert (tmp_path / "pack" / "source_corpus.json").exists() assert (tmp_path / "pack" / "source_corpus.json").exists()
assert (tmp_path / "pack" / "knowledge_graph.json").exists() assert (tmp_path / "pack" / "knowledge_graph.json").exists()
assert (tmp_path / "pack" / "doclift_bundle_summary.json").exists() assert (tmp_path / "pack" / "doclift_bundle_summary.json").exists()
def test_doclift_bundle_demo_carries_groundrecall_query_bundle(tmp_path: Path) -> None:
bundle_dir = tmp_path / "bundle"
shutil.copytree(_fixture_bundle(), bundle_dir)
(bundle_dir / "groundrecall_query_bundle.json").write_text(
json.dumps(
{
"bundle_kind": "groundrecall_query_bundle",
"concept": {"concept_id": "concept::lecture-1", "title": "Lecture 1"},
"review_candidates": [{"candidate_id": "concept::lecture-1", "rationale": "Lecture 1 | lane=knowledge_capture | priority=20"}],
}
),
encoding="utf-8",
)
summary = run_doclift_bundle_demo(bundle_dir, "Example Course", tmp_path / "pack")
assert summary["groundrecall_bundle_included"] is True
assert (tmp_path / "pack" / "groundrecall_query_bundle.json").exists()

View File

@ -27,3 +27,29 @@ def test_emit_pack(tmp_path: Path) -> None:
assert (tmp_path / "review_report.md").exists() assert (tmp_path / "review_report.md").exists()
assert (tmp_path / "source_corpus.json").exists() assert (tmp_path / "source_corpus.json").exists()
assert (tmp_path / "knowledge_graph.json").exists() assert (tmp_path / "knowledge_graph.json").exists()
def test_emit_pack_can_write_groundrecall_query_bundle(tmp_path: Path) -> None:
course = parse_markdown_course(SAMPLE, "Sample Course")
concepts = extract_concept_candidates(course)
ctx = RuleContext(course=course, concepts=concepts)
run_rules(ctx, build_default_rules())
groundrecall_bundle = {
"bundle_kind": "groundrecall_query_bundle",
"concept": {"concept_id": "concept::topic-a", "title": "Topic A"},
"review_candidates": [{"candidate_id": "concept::topic-a", "rationale": "Topic A | lane=knowledge_capture | priority=20"}],
}
draft = build_draft_pack(
course,
ctx.concepts,
"Tester",
"REVIEW",
ctx.review_flags,
groundrecall_query_bundle=groundrecall_bundle,
)
write_draft_pack(draft, tmp_path)
pack_yaml = (tmp_path / "pack.yaml").read_text(encoding="utf-8")
bundle_payload = (tmp_path / "groundrecall_query_bundle.json").read_text(encoding="utf-8")
assert "groundrecall_query_bundle.json" in pack_yaml
assert '"bundle_kind": "groundrecall_query_bundle"' in bundle_payload