From db2cca50d089dd8c1770e2bc503a19cfdb9f66a5 Mon Sep 17 00:00:00 2001 From: welsberr Date: Fri, 13 Mar 2026 05:49:26 -0400 Subject: [PATCH] Added mastery ledger and capability export. --- README.md | 59 +++++++- docs/faq.md | 76 ++++------- docs/mastery-ledger.md | 31 +++++ src/didactopus/agentic_loop.py | 192 ++++++++++++++++----------- src/didactopus/evaluator_pipeline.py | 77 +++++++---- src/didactopus/main.py | 89 +++++-------- src/didactopus/mastery_ledger.py | 78 +++++++++++ tests/test_mastery_ledger.py | 43 ++++++ 8 files changed, 436 insertions(+), 209 deletions(-) create mode 100644 docs/mastery-ledger.md create mode 100644 src/didactopus/mastery_ledger.py create mode 100644 tests/test_mastery_ledger.py diff --git a/README.md b/README.md index 5d4452e..5f56f4a 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,67 @@ ## Recent revisions +### Mastery Ledger + +This revision adds a **Mastery Ledger + Capability Export** layer. + +The main purpose is to let Didactopus turn accumulated learner state into +portable, inspectable artifacts that can support downstream deployment, +review, orchestration, or certification-like workflows. + +#### What is new + +- mastery ledger data model +- capability profile export +- JSON export of mastered concepts and evaluator summaries +- Markdown export of a readable capability report +- artifact manifest for produced deliverables +- demo CLI for generating exports for an AI student or human learner +- FAQ covering how learned mastery is represented and put to work + +#### Why this matters + +Didactopus can now do more than guide learning. It can also emit a structured +statement of what a learner appears able to do, based on explicit concepts, +evidence, and artifacts. + +That makes it easier to use Didactopus as: +- a mastery tracker +- a portfolio generator +- a deployment-readiness aid +- an orchestration input for agent routing + +#### Mastery representation + +A learner's mastery is represented as structured operational state, including: + +- mastered concepts +- evaluator results +- evidence summaries +- weak dimensions +- attempt history +- produced artifacts +- capability export + +This is stricter than a normal chat transcript or self-description. + +#### Future direction + +A later revision should connect the capability export with: +- formal evaluator outputs +- signed evidence ledgers +- domain-specific capability schemas +- deployment policies for agent routing + + +### Evaluator Pipeline + This revision introduces a **pluggable evaluator pipeline** that converts learner attempts into structured mastery evidence. -The prior revision adds an **agentic learner loop** that turns Didactopus into a closed-loop mastery system prototype. +### Agentic Learner Loop + +This revision adds an **agentic learner loop** that turns Didactopus into a closed-loop mastery system prototype. The loop can now: diff --git a/docs/faq.md b/docs/faq.md index 61974f3..f4c6e7d 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,65 +1,37 @@ # FAQ -## What is Didactopus? - -Didactopus is a mastery-oriented learning infrastructure that uses concept graphs, evidence-based assessment, and adaptive planning to support serious learning. - -## Is this just a tutoring chatbot? - -No. The intended architecture is broader than tutoring. Didactopus maintains explicit representations of: -- concepts -- prerequisites -- mastery criteria -- evidence -- learner state -- planning priorities - ## How is an AI student's learned mastery represented? -An AI student's learned mastery is represented as structured state, not just conversation history. - -Important elements include: -- mastered concept set +As structured operational state, including: +- mastered concepts +- evaluator summaries +- weak dimensions - evidence records -- dimension-level competence summaries -- weak-dimension lists -- project eligibility -- target-progress state -- produced artifacts and critiques +- artifacts +- capability export -## Does Didactopus fine-tune the AI model? +## Does Didactopus change the AI model weights? -Not in the current design. Didactopus supervises and evaluates a learner agent, but it does not itself retrain foundation model weights. +No. In the current architecture, Didactopus supervises and evaluates a learner +agent, but it does not retrain the foundation model. -## Then how is the AI student “ready to work”? +## How is an AI student ready to be put to work? -Readiness is operationalized by the mastery state. An AI student is ready for a class of tasks when: -- relevant concepts are mastered -- confidence is high enough -- weak dimensions are acceptable for the target task -- prerequisite and project evidence support deployment +Readiness is represented operationally. A downstream system can inspect: +- which concepts are mastered +- which weak dimensions remain +- what artifacts were produced +- what evaluator evidence supports deployment -## Could mastered state be exported? +## Is the capability export a certification? -Yes. A future implementation should support export of: -- concept mastery ledgers -- evidence portfolios -- competence profiles -- project artifacts -- domain-specific capability summaries +Not by itself. It is a structured mastery report. In future, it could be combined +with formal evaluators, signed evidence records, and policy rules. -## Is human learning treated the same way? +## Why is this useful? -The same conceptual framework applies to both human and AI learners, though interfaces and evidence sources differ. - -## What is the difference between mastery and model knowledge? - -A model may contain latent knowledge or pattern familiarity. Didactopus mastery is narrower and stricter: it is evidence-backed demonstrated competence with respect to explicit concepts and criteria. - -## Why not use only embeddings and LLM judgments? - -Because correctness, especially in formal domains, often needs stronger guarantees than plausibility. That is why Didactopus may eventually need hybrid symbolic or executable validation components. - -## Can Didactopus work offline? - -Yes, that is a primary design goal. The architecture is local-first and can be paired with local model serving and locally stored domain packs. +Because it allows Didactopus outputs to feed into: +- task routing +- portfolio review +- benchmark comparison +- agent deployment policies diff --git a/docs/mastery-ledger.md b/docs/mastery-ledger.md new file mode 100644 index 0000000..6e7bc68 --- /dev/null +++ b/docs/mastery-ledger.md @@ -0,0 +1,31 @@ +# Mastery Ledger + +The mastery ledger is the structured record of what a learner has demonstrated. + +## Core contents + +- learner identity +- target domain or goal +- mastered concepts +- concept-level evidence summaries +- weak dimensions +- artifact records +- generated capability profile + +## Exports + +This scaffold exports: + +- JSON capability profile +- Markdown capability report +- artifact manifest JSON + +## Why it matters + +The mastery ledger provides an explicit representation of readiness. +It supports both human and AI learners. + +## Important caveat + +The current scaffold is not a formal certification system. It is a structured +capability report driven by the Didactopus evidence and evaluator pipeline. diff --git a/src/didactopus/agentic_loop.py b/src/didactopus/agentic_loop.py index 12a43ca..299f63f 100644 --- a/src/didactopus/agentic_loop.py +++ b/src/didactopus/agentic_loop.py @@ -1,92 +1,132 @@ -from __future__ import annotations - from dataclasses import dataclass, field -from .planner import rank_next_concepts, PlannerWeights -from .evidence_engine import EvidenceState, ConceptEvidenceSummary +from .evaluator_pipeline import ( + LearnerAttempt, + RubricEvaluator, + CodeTestEvaluator, + SymbolicRuleEvaluator, + CritiqueEvaluator, + PortfolioEvaluator, + run_pipeline, + aggregate, +) + + +@dataclass +class ConceptEvidenceSummary: + concept_key: str + weak_dimensions: list[str] = field(default_factory=list) + mastered: bool = False + aggregated: dict = field(default_factory=dict) + evaluators: list[str] = field(default_factory=list) + + +@dataclass +class EvidenceState: + summary_by_concept: dict[str, ConceptEvidenceSummary] = field(default_factory=dict) + resurfaced_concepts: set[str] = field(default_factory=set) @dataclass class AgenticStudentState: + learner_id: str = "demo-agent" + display_name: str = "Demo Agentic Student" mastered_concepts: set[str] = field(default_factory=set) evidence_state: EvidenceState = field(default_factory=EvidenceState) attempt_history: list[dict] = field(default_factory=list) + artifacts: list[dict] = field(default_factory=list) -def synthetic_attempt_for_concept(concept: str) -> dict: +def synthetic_attempt_for_concept(concept: str) -> LearnerAttempt: if "descriptive-statistics" in concept: - weak = [] - mastered = True - elif "probability-basics" in concept: - weak = ["transfer"] - mastered = False - elif "prior" in concept: - weak = ["explanation", "transfer"] - mastered = False - elif "posterior" in concept: - weak = ["critique", "transfer"] - mastered = False - elif "model-checking" in concept: - weak = ["critique"] - mastered = False - else: - weak = ["correctness"] - mastered = False - - return {"concept": concept, "mastered": mastered, "weak_dimensions": weak} - - -def integrate_attempt(state: AgenticStudentState, attempt: dict) -> None: - concept = attempt["concept"] - summary = ConceptEvidenceSummary( - concept_key=concept, - weak_dimensions=list(attempt["weak_dimensions"]), - mastered=bool(attempt["mastered"]), - ) - state.evidence_state.summary_by_concept[concept] = summary - if summary.mastered: - state.mastered_concepts.add(concept) - state.evidence_state.resurfaced_concepts.discard(concept) - else: - if concept in state.mastered_concepts: - state.mastered_concepts.remove(concept) - state.evidence_state.resurfaced_concepts.add(concept) - state.attempt_history.append(attempt) - - -def run_agentic_learning_loop( - graph, - project_catalog: list[dict], - target_concepts: list[str], - weights: PlannerWeights, - max_steps: int = 5, -) -> AgenticStudentState: - state = AgenticStudentState() - - for _ in range(max_steps): - weak_dimensions_by_concept = { - key: summary.weak_dimensions - for key, summary in state.evidence_state.summary_by_concept.items() - } - fragile = set(state.evidence_state.resurfaced_concepts) - - ranked = rank_next_concepts( - graph=graph, - mastered=state.mastered_concepts, - targets=target_concepts, - weak_dimensions_by_concept=weak_dimensions_by_concept, - fragile_concepts=fragile, - project_catalog=project_catalog, - weights=weights, + return LearnerAttempt( + concept=concept, + artifact_type="explanation", + content="Mean and variance summarize a dataset because they describe center and spread.", + metadata={"deliverable_count": 1, "artifact_name": "descriptive_statistics_note.md"}, ) - if not ranked: - break + if "probability-basics" in concept: + return LearnerAttempt( + concept=concept, + artifact_type="explanation", + content="Conditional probability changes because context changes the sample space.", + metadata={"deliverable_count": 1, "artifact_name": "probability_basics_note.md"}, + ) + if "prior" in concept: + return LearnerAttempt( + concept=concept, + artifact_type="explanation", + content="A prior is an assumption before evidence, but one limitation is bias.", + metadata={"deliverable_count": 1, "artifact_name": "prior_reflection.md"}, + ) + if "posterior" in concept: + return LearnerAttempt( + concept=concept, + artifact_type="symbolic", + content="Therefore posterior = updated belief after evidence, but one assumption may be model fit.", + metadata={"deliverable_count": 1, "artifact_name": "posterior_symbolic_note.md"}, + ) + return LearnerAttempt( + concept=concept, + artifact_type="critique", + content="A weakness is hidden assumptions; a limitation is poor fit; uncertainty remains.", + metadata={"deliverable_count": 2, "artifact_name": "critique_report.md"}, + ) - chosen = ranked[0]["concept"] - attempt = synthetic_attempt_for_concept(chosen) + +def evaluator_set_for_attempt(attempt: LearnerAttempt): + evaluators = [RubricEvaluator(), CritiqueEvaluator()] + if attempt.artifact_type == "code": + evaluators.append(CodeTestEvaluator()) + if attempt.artifact_type == "symbolic": + evaluators.append(SymbolicRuleEvaluator()) + if attempt.artifact_type in {"project", "portfolio", "critique"}: + evaluators.append(PortfolioEvaluator()) + return evaluators + + +def integrate_attempt(state: AgenticStudentState, attempt: LearnerAttempt) -> None: + results = run_pipeline(attempt, evaluator_set_for_attempt(attempt)) + aggregated = aggregate(results) + weak = [dim for dim, score in aggregated.items() if score < 0.75] + mastered = len(aggregated) > 0 and all(score >= 0.75 for score in aggregated.values()) + + summary = ConceptEvidenceSummary( + concept_key=attempt.concept, + weak_dimensions=weak, + mastered=mastered, + aggregated=aggregated, + evaluators=[r.evaluator_name for r in results], + ) + state.evidence_state.summary_by_concept[attempt.concept] = summary + + if mastered: + state.mastered_concepts.add(attempt.concept) + state.evidence_state.resurfaced_concepts.discard(attempt.concept) + else: + if attempt.concept in state.mastered_concepts: + state.mastered_concepts.remove(attempt.concept) + state.evidence_state.resurfaced_concepts.add(attempt.concept) + + state.attempt_history.append({ + "concept": attempt.concept, + "artifact_type": attempt.artifact_type, + "aggregated": aggregated, + "weak_dimensions": weak, + "mastered": mastered, + "evaluators": [r.evaluator_name for r in results], + }) + + state.artifacts.append({ + "concept": attempt.concept, + "artifact_type": attempt.artifact_type, + "artifact_name": attempt.metadata.get("artifact_name", f"{attempt.concept}.txt"), + }) + + +def run_demo_agentic_loop(concepts: list[str]) -> AgenticStudentState: + state = AgenticStudentState() + for concept in concepts: + attempt = synthetic_attempt_for_concept(concept) integrate_attempt(state, attempt) - - if all(target in state.mastered_concepts for target in target_concepts): - break - return state diff --git a/src/didactopus/evaluator_pipeline.py b/src/didactopus/evaluator_pipeline.py index b1d974a..d31a497 100644 --- a/src/didactopus/evaluator_pipeline.py +++ b/src/didactopus/evaluator_pipeline.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field + @dataclass class LearnerAttempt: concept: str @@ -7,6 +8,7 @@ class LearnerAttempt: content: str metadata: dict = field(default_factory=dict) + @dataclass class EvaluatorResult: evaluator_name: str @@ -14,59 +16,84 @@ class EvaluatorResult: passed: bool | None = None notes: str = "" + class RubricEvaluator: name = "rubric" + def evaluate(self, attempt: LearnerAttempt): - explanation = 0.85 if len(attempt.content) > 40 else 0.55 - correctness = 0.80 if "because" in attempt.content.lower() else 0.65 - return EvaluatorResult(self.name, - {"correctness": correctness, - "explanation": explanation}) + explanation = 0.85 if len(attempt.content.strip()) > 40 else 0.55 + correctness = 0.80 if "because" in attempt.content.lower() or "therefore" in attempt.content.lower() else 0.65 + return EvaluatorResult( + self.name, + {"correctness": correctness, "explanation": explanation}, + notes="Heuristic scaffold rubric score.", + ) + class CodeTestEvaluator: name = "code_test" + def evaluate(self, attempt: LearnerAttempt): - passed = "return" in attempt.content + passed = "return" in attempt.content or "assert" in attempt.content score = 0.9 if passed else 0.35 - return EvaluatorResult(self.name, - {"correctness": score, - "project_execution": score}, - passed=passed) + return EvaluatorResult( + self.name, + {"correctness": score, "project_execution": score}, + passed=passed, + notes="Stub code/test evaluator.", + ) + class SymbolicRuleEvaluator: name = "symbolic_rule" + def evaluate(self, attempt: LearnerAttempt): - passed = "=" in attempt.content + passed = "=" in attempt.content or "therefore" in attempt.content.lower() score = 0.88 if passed else 0.4 - return EvaluatorResult(self.name, - {"correctness": score}, - passed=passed) + return EvaluatorResult( + self.name, + {"correctness": score}, + passed=passed, + notes="Stub symbolic evaluator.", + ) + class CritiqueEvaluator: name = "critique" + def evaluate(self, attempt: LearnerAttempt): - markers = ["assumption","bias","limitation","weakness"] + markers = ["assumption", "bias", "limitation", "weakness", "uncertain"] found = sum(m in attempt.content.lower() for m in markers) score = min(1.0, 0.35 + 0.15 * found) - return EvaluatorResult(self.name, {"critique": score}) + return EvaluatorResult( + self.name, + {"critique": score}, + notes="Stub critique evaluator.", + ) + class PortfolioEvaluator: name = "portfolio" + def evaluate(self, attempt: LearnerAttempt): - count = int(attempt.metadata.get("deliverable_count",1)) - score = min(1.0, 0.5 + 0.1 * count) - return EvaluatorResult(self.name, - {"project_execution": score, - "transfer": max(0.4, score-0.1)}) + deliverable_count = int(attempt.metadata.get("deliverable_count", 1)) + score = min(1.0, 0.5 + 0.1 * deliverable_count) + return EvaluatorResult( + self.name, + {"project_execution": score, "transfer": max(0.4, score - 0.1)}, + notes="Stub portfolio evaluator.", + ) + def run_pipeline(attempt, evaluators): return [e.evaluate(attempt) for e in evaluators] + def aggregate(results): totals = {} counts = {} for r in results: - for d,v in r.dimensions.items(): - totals[d] = totals.get(d,0)+v - counts[d] = counts.get(d,0)+1 - return {d: totals[d]/counts[d] for d in totals} + for dim, val in r.dimensions.items(): + totals[dim] = totals.get(dim, 0.0) + val + counts[dim] = counts.get(dim, 0) + 1 + return {dim: totals[dim] / counts[dim] for dim in totals} diff --git a/src/didactopus/main.py b/src/didactopus/main.py index 88aad98..93af9ca 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -1,70 +1,49 @@ import argparse -import os from pathlib import Path -from .agentic_loop import run_agentic_learning_loop -from .artifact_registry import check_pack_dependencies, detect_dependency_cycles, discover_domain_packs -from .config import load_config -from .graph_builder import build_concept_graph -from .learning_graph import build_merged_learning_graph -from .planner import PlannerWeights +from .agentic_loop import run_demo_agentic_loop +from .mastery_ledger import ( + build_capability_profile, + export_capability_profile_json, + export_capability_report_markdown, + export_artifact_manifest, +) def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Didactopus agentic learner loop") - parser.add_argument("--target", default="bayes-extension::posterior") - parser.add_argument("--steps", type=int, default=5) - parser.add_argument("--config", default=os.environ.get("DIDACTOPUS_CONFIG", "configs/config.example.yaml")) + parser = argparse.ArgumentParser(description="Didactopus mastery ledger demo") + parser.add_argument("--domain", default="Bayesian inference") + parser.add_argument("--outdir", default="exports") return parser def main() -> None: args = build_parser().parse_args() - config = load_config(Path(args.config)) - results = discover_domain_packs(["domain-packs"]) - dep_errors = check_pack_dependencies(results) - cycles = detect_dependency_cycles(results) + outdir = Path(args.outdir) + outdir.mkdir(parents=True, exist_ok=True) - if dep_errors: - print("Dependency errors:") - for err in dep_errors: - print(f"- {err}") - if cycles: - print("Dependency cycles:") - for cycle in cycles: - print(f"- {' -> '.join(cycle)}") - return + concepts = [ + "foundations-statistics::descriptive-statistics", + "foundations-statistics::probability-basics", + "bayes-extension::prior", + "bayes-extension::posterior", + "applied-inference::model-checking", + ] + state = run_demo_agentic_loop(concepts) + profile = build_capability_profile(state, args.domain) - merged = build_merged_learning_graph(results, config.platform.default_dimension_thresholds) - graph = build_concept_graph(results, config.platform.default_dimension_thresholds) + json_path = outdir / "capability_profile.json" + md_path = outdir / "capability_report.md" + manifest_path = outdir / "artifact_manifest.json" - state = run_agentic_learning_loop( - graph=graph, - project_catalog=merged.project_catalog, - target_concepts=[args.target], - weights=PlannerWeights( - readiness_bonus=config.planner.readiness_bonus, - target_distance_weight=config.planner.target_distance_weight, - weak_dimension_bonus=config.planner.weak_dimension_bonus, - fragile_review_bonus=config.planner.fragile_review_bonus, - project_unlock_bonus=config.planner.project_unlock_bonus, - semantic_similarity_weight=config.planner.semantic_similarity_weight, - ), - max_steps=args.steps, - ) + export_capability_profile_json(profile, str(json_path)) + export_capability_report_markdown(profile, str(md_path)) + export_artifact_manifest(profile, str(manifest_path)) - print("== Didactopus Agentic Learner Loop ==") - print(f"Target: {args.target}") - print(f"Steps executed: {len(state.attempt_history)}") - print() - print("Mastered concepts:") - if state.mastered_concepts: - for item in sorted(state.mastered_concepts): - print(f"- {item}") - else: - print("- none") - print() - print("Attempt history:") - for item in state.attempt_history: - weak = ", ".join(item["weak_dimensions"]) if item["weak_dimensions"] else "none" - print(f"- {item['concept']}: mastered={item['mastered']}, weak={weak}") + print("== Didactopus Mastery Ledger Demo ==") + print(f"Domain: {args.domain}") + print(f"Mastered concepts: {len(profile.mastered_concepts)}") + print(f"Artifacts: {len(profile.artifacts)}") + print(f"Capability profile JSON: {json_path}") + print(f"Capability report Markdown: {md_path}") + print(f"Artifact manifest JSON: {manifest_path}") diff --git a/src/didactopus/mastery_ledger.py b/src/didactopus/mastery_ledger.py new file mode 100644 index 0000000..bb07233 --- /dev/null +++ b/src/didactopus/mastery_ledger.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass, field, asdict +from pathlib import Path +import json + + +@dataclass +class CapabilityProfile: + learner_id: str + display_name: str + domain: str + mastered_concepts: list[str] = field(default_factory=list) + weak_dimensions_by_concept: dict[str, list[str]] = field(default_factory=dict) + evaluator_summary_by_concept: dict[str, dict] = field(default_factory=dict) + artifacts: list[dict] = field(default_factory=list) + + +def build_capability_profile(state, domain: str) -> CapabilityProfile: + weak = {} + summaries = {} + for concept, summary in state.evidence_state.summary_by_concept.items(): + weak[concept] = list(summary.weak_dimensions) + summaries[concept] = dict(summary.aggregated) + return CapabilityProfile( + learner_id=state.learner_id, + display_name=state.display_name, + domain=domain, + mastered_concepts=sorted(state.mastered_concepts), + weak_dimensions_by_concept=weak, + evaluator_summary_by_concept=summaries, + artifacts=list(state.artifacts), + ) + + +def export_capability_profile_json(profile: CapabilityProfile, path: str) -> None: + Path(path).write_text(json.dumps(asdict(profile), indent=2), encoding="utf-8") + + +def export_capability_report_markdown(profile: CapabilityProfile, path: str) -> None: + lines = [ + f"# Capability Profile: {profile.display_name}", + "", + f"- Learner ID: `{profile.learner_id}`", + f"- Domain: `{profile.domain}`", + "", + "## Mastered Concepts", + ] + if profile.mastered_concepts: + lines.extend([f"- {c}" for c in profile.mastered_concepts]) + else: + lines.append("- none") + lines.extend(["", "## Concept Summaries"]) + if profile.evaluator_summary_by_concept: + for concept, dims in sorted(profile.evaluator_summary_by_concept.items()): + lines.append(f"### {concept}") + if dims: + for dim, score in sorted(dims.items()): + lines.append(f"- {dim}: {score:.2f}") + weak = profile.weak_dimensions_by_concept.get(concept, []) + lines.append(f"- weak dimensions: {', '.join(weak) if weak else 'none'}") + lines.append("") + else: + lines.append("- none") + lines.extend(["## Artifacts"]) + if profile.artifacts: + for art in profile.artifacts: + lines.append(f"- {art['artifact_name']} ({art['artifact_type']}) for {art['concept']}") + else: + lines.append("- none") + Path(path).write_text("\n".join(lines), encoding="utf-8") + + +def export_artifact_manifest(profile: CapabilityProfile, path: str) -> None: + manifest = { + "learner_id": profile.learner_id, + "domain": profile.domain, + "artifacts": profile.artifacts, + } + Path(path).write_text(json.dumps(manifest, indent=2), encoding="utf-8") diff --git a/tests/test_mastery_ledger.py b/tests/test_mastery_ledger.py new file mode 100644 index 0000000..c68c8c5 --- /dev/null +++ b/tests/test_mastery_ledger.py @@ -0,0 +1,43 @@ +from pathlib import Path +import json + +from didactopus.agentic_loop import run_demo_agentic_loop +from didactopus.mastery_ledger import ( + build_capability_profile, + export_capability_profile_json, + export_capability_report_markdown, + export_artifact_manifest, +) + + +def test_build_capability_profile() -> None: + state = run_demo_agentic_loop([ + "foundations-statistics::descriptive-statistics", + "bayes-extension::prior", + ]) + profile = build_capability_profile(state, "Bayesian inference") + assert profile.domain == "Bayesian inference" + assert len(profile.artifacts) == 2 + + +def test_exports(tmp_path: Path) -> None: + state = run_demo_agentic_loop([ + "foundations-statistics::descriptive-statistics", + "bayes-extension::prior", + ]) + profile = build_capability_profile(state, "Bayesian inference") + + json_path = tmp_path / "capability_profile.json" + md_path = tmp_path / "capability_report.md" + manifest_path = tmp_path / "artifact_manifest.json" + + export_capability_profile_json(profile, str(json_path)) + export_capability_report_markdown(profile, str(md_path)) + export_artifact_manifest(profile, str(manifest_path)) + + assert json_path.exists() + assert md_path.exists() + assert manifest_path.exists() + + data = json.loads(json_path.read_text(encoding="utf-8")) + assert data["domain"] == "Bayesian inference"