Multidimensional mastery additions
This commit is contained in:
parent
d682d2774f
commit
4dcca8bc3f
26
README.md
26
README.md
|
|
@ -2,21 +2,25 @@
|
|||
|
||||
**Didactopus** is a local-first AI-assisted autodidactic mastery platform.
|
||||
|
||||
This revision upgrades the evidence layer from simple averaging to a more realistic weighted and recency-aware mastery model.
|
||||
This revision upgrades the evidence layer from a single weighted score to a **multi-dimensional mastery model**.
|
||||
|
||||
## Added in this revision
|
||||
|
||||
- evidence-type weighting
|
||||
- recency weighting
|
||||
- confidence estimation from weighted evidence mass
|
||||
- dimension-level rubric storage
|
||||
- weighted concept summaries
|
||||
- mastery decisions using weighted score and confidence
|
||||
- resurfacing from recent weak evidence
|
||||
- tests for weighted scoring and recency behavior
|
||||
- per-concept mastery dimensions:
|
||||
- correctness
|
||||
- explanation
|
||||
- transfer
|
||||
- project_execution
|
||||
- critique
|
||||
- weighted, recency-aware dimension summaries
|
||||
- per-dimension mastery thresholds
|
||||
- concept-level mastery determined from all required dimensions
|
||||
- dimension-specific weakness reporting
|
||||
- adaptive next-step selection informed by weak dimensions
|
||||
- tests for multi-dimensional mastery promotion and partial weakness detection
|
||||
|
||||
## Why this matters
|
||||
|
||||
Not all evidence should count equally.
|
||||
Real mastery is not one scalar.
|
||||
|
||||
A capstone project or transfer task should usually matter more than a short explanation, and recent poor performance should sometimes matter more than older success. This revision begins to model that explicitly.
|
||||
A learner can be strong at routine correctness and still be weak at transfer, explanation, or critique. This revision lets Didactopus represent that distinction explicitly.
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ platform:
|
|||
verification_required: true
|
||||
require_learner_explanations: true
|
||||
permit_direct_answers: false
|
||||
mastery_threshold: 0.8
|
||||
resurfacing_threshold: 0.55
|
||||
confidence_threshold: 0.8
|
||||
evidence_weights:
|
||||
|
|
@ -23,6 +22,12 @@ platform:
|
|||
project: 2.5
|
||||
transfer: 2.0
|
||||
recent_evidence_multiplier: 1.35
|
||||
dimension_thresholds:
|
||||
correctness: 0.8
|
||||
explanation: 0.75
|
||||
transfer: 0.7
|
||||
project_execution: 0.75
|
||||
critique: 0.7
|
||||
|
||||
artifacts:
|
||||
local_pack_dirs:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
# Multi-Dimensional Mastery Model
|
||||
|
||||
## Purpose
|
||||
|
||||
Didactopus should distinguish among different forms of competence rather than collapsing them into one number.
|
||||
|
||||
## Dimensions in this revision
|
||||
|
||||
- **correctness**: routine technical correctness
|
||||
- **explanation**: ability to explain clearly and justify reasoning
|
||||
- **transfer**: ability to apply knowledge in new contexts
|
||||
- **project_execution**: ability to carry work through in an authentic task
|
||||
- **critique**: ability to detect flaws and evaluate reasoning
|
||||
|
||||
## Current rule
|
||||
|
||||
A concept counts as mastered only if:
|
||||
- confidence meets threshold
|
||||
- every required dimension present for that concept meets its configured threshold
|
||||
|
||||
## Consequences
|
||||
|
||||
A learner can now be:
|
||||
- **ready in correctness but weak in transfer**
|
||||
- **strong in explanations but weak in project execution**
|
||||
- **mastered overall only when all required dimensions are adequate**
|
||||
|
||||
## Future work
|
||||
|
||||
- concept-specific dimension requirements
|
||||
- different thresholds by domain
|
||||
- prerequisite softening based on partial dimension mastery
|
||||
- deliberate weak-area practice generation
|
||||
|
|
@ -7,14 +7,7 @@ import yaml
|
|||
import networkx as nx
|
||||
|
||||
from . import __version__ as DIDACTOPUS_VERSION
|
||||
from .artifact_schemas import (
|
||||
ConceptsFile,
|
||||
PackManifest,
|
||||
ProjectsFile,
|
||||
RoadmapFile,
|
||||
RubricsFile,
|
||||
validate_top_level_key,
|
||||
)
|
||||
from .artifact_schemas import ConceptsFile, PackManifest, ProjectsFile, RoadmapFile, RubricsFile
|
||||
|
||||
REQUIRED_FILES = ["pack.yaml", "concepts.yaml", "roadmap.yaml", "projects.yaml", "rubrics.yaml"]
|
||||
|
||||
|
|
@ -48,13 +41,11 @@ def _load_yaml(path: Path) -> dict[str, Any]:
|
|||
def validate_pack(pack_dir: str | Path) -> PackValidationResult:
|
||||
pack_path = Path(pack_dir)
|
||||
result = PackValidationResult(pack_dir=pack_path)
|
||||
|
||||
for filename in REQUIRED_FILES:
|
||||
if not (pack_path / filename).exists():
|
||||
result.errors.append(f"missing required file: {filename}")
|
||||
if result.errors:
|
||||
return result
|
||||
|
||||
try:
|
||||
result.manifest = PackManifest.model_validate(_load_yaml(pack_path / "pack.yaml"))
|
||||
if not _version_in_range(DIDACTOPUS_VERSION, result.manifest.didactopus_min_version, result.manifest.didactopus_max_version):
|
||||
|
|
@ -62,19 +53,12 @@ def validate_pack(pack_dir: str | Path) -> PackValidationResult:
|
|||
f"incompatible with Didactopus core version {DIDACTOPUS_VERSION}; supported range is "
|
||||
f"{result.manifest.didactopus_min_version}..{result.manifest.didactopus_max_version}"
|
||||
)
|
||||
|
||||
concepts = ConceptsFile.model_validate(_load_yaml(pack_path / "concepts.yaml"))
|
||||
roadmap = RoadmapFile.model_validate(_load_yaml(pack_path / "roadmap.yaml"))
|
||||
projects = ProjectsFile.model_validate(_load_yaml(pack_path / "projects.yaml"))
|
||||
rubrics = RubricsFile.model_validate(_load_yaml(pack_path / "rubrics.yaml"))
|
||||
|
||||
result.loaded_files["concepts"] = concepts
|
||||
result.loaded_files["roadmap"] = roadmap
|
||||
result.loaded_files["projects"] = projects
|
||||
result.loaded_files["rubrics"] = rubrics
|
||||
result.loaded_files["concepts"] = ConceptsFile.model_validate(_load_yaml(pack_path / "concepts.yaml"))
|
||||
result.loaded_files["roadmap"] = RoadmapFile.model_validate(_load_yaml(pack_path / "roadmap.yaml"))
|
||||
result.loaded_files["projects"] = ProjectsFile.model_validate(_load_yaml(pack_path / "projects.yaml"))
|
||||
result.loaded_files["rubrics"] = RubricsFile.model_validate(_load_yaml(pack_path / "rubrics.yaml"))
|
||||
except Exception as exc:
|
||||
result.errors.append(str(exc))
|
||||
|
||||
result.is_valid = not result.errors
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -56,15 +56,5 @@ class ProjectsFile(BaseModel):
|
|||
projects: list[ProjectEntry]
|
||||
|
||||
|
||||
class RubricEntry(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
criteria: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RubricsFile(BaseModel):
|
||||
rubrics: list[RubricEntry]
|
||||
|
||||
|
||||
def validate_top_level_key(data: dict[str, Any], required_key: str) -> list[str]:
|
||||
return [] if required_key in data else [f"missing required top-level key: {required_key}"]
|
||||
rubrics: list[dict[str, Any]]
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ class PlatformConfig(BaseModel):
|
|||
verification_required: bool = True
|
||||
require_learner_explanations: bool = True
|
||||
permit_direct_answers: bool = False
|
||||
mastery_threshold: float = 0.8
|
||||
resurfacing_threshold: float = 0.55
|
||||
confidence_threshold: float = 0.8
|
||||
evidence_weights: dict[str, float] = Field(
|
||||
|
|
@ -38,6 +37,15 @@ class PlatformConfig(BaseModel):
|
|||
}
|
||||
)
|
||||
recent_evidence_multiplier: float = 1.35
|
||||
dimension_thresholds: dict[str, float] = Field(
|
||||
default_factory=lambda: {
|
||||
"correctness": 0.8,
|
||||
"explanation": 0.75,
|
||||
"transfer": 0.7,
|
||||
"project_execution": 0.75,
|
||||
"critique": 0.7,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ArtifactConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from typing import Literal
|
|||
from .adaptive_engine import LearnerProfile
|
||||
|
||||
EvidenceType = Literal["explanation", "problem", "project", "transfer"]
|
||||
MASTERY_DIMENSIONS = ["correctness", "explanation", "transfer", "project_execution", "critique"]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -26,6 +27,8 @@ class ConceptEvidenceSummary:
|
|||
total_weight: float = 0.0
|
||||
confidence: float = 0.0
|
||||
dimension_means: dict[str, float] = field(default_factory=dict)
|
||||
weak_dimensions: list[str] = field(default_factory=list)
|
||||
mastered: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -53,11 +56,13 @@ def recompute_concept_summary(
|
|||
items: list[EvidenceItem],
|
||||
type_weights: dict[str, float],
|
||||
recent_multiplier: float,
|
||||
dimension_thresholds: dict[str, float],
|
||||
confidence_threshold: float,
|
||||
) -> ConceptEvidenceSummary:
|
||||
weighted_score_sum = 0.0
|
||||
total_weight = 0.0
|
||||
dimension_totals: dict[str, float] = {}
|
||||
dimension_weights: dict[str, float] = {}
|
||||
dim_totals: dict[str, float] = {}
|
||||
dim_weights: dict[str, float] = {}
|
||||
|
||||
for item in items:
|
||||
item.score = clamp_score(item.score)
|
||||
|
|
@ -65,24 +70,42 @@ def recompute_concept_summary(
|
|||
weighted_score_sum += item.score * w
|
||||
total_weight += w
|
||||
|
||||
for dim, val in item.rubric_dimensions.items():
|
||||
v = clamp_score(val)
|
||||
dimension_totals[dim] = dimension_totals.get(dim, 0.0) + v * w
|
||||
dimension_weights[dim] = dimension_weights.get(dim, 0.0) + w
|
||||
for dim, value in item.rubric_dimensions.items():
|
||||
v = clamp_score(value)
|
||||
dim_totals[dim] = dim_totals.get(dim, 0.0) + v * w
|
||||
dim_weights[dim] = dim_weights.get(dim, 0.0) + w
|
||||
|
||||
dimension_means = {
|
||||
dim: (dimension_totals[dim] / dimension_weights[dim])
|
||||
for dim in dimension_totals
|
||||
if dimension_weights[dim] > 0
|
||||
dim: dim_totals[dim] / dim_weights[dim]
|
||||
for dim in dim_totals
|
||||
if dim_weights[dim] > 0
|
||||
}
|
||||
confidence = confidence_from_weight(total_weight)
|
||||
|
||||
weak_dimensions = []
|
||||
for dim, threshold in dimension_thresholds.items():
|
||||
if dim in dimension_means and dimension_means[dim] < threshold:
|
||||
weak_dimensions.append(dim)
|
||||
|
||||
mastered = (
|
||||
confidence >= confidence_threshold
|
||||
and all(
|
||||
(dim in dimension_means and dimension_means[dim] >= threshold)
|
||||
for dim, threshold in dimension_thresholds.items()
|
||||
if dim in dimension_means
|
||||
)
|
||||
and len(dimension_means) > 0
|
||||
)
|
||||
|
||||
return ConceptEvidenceSummary(
|
||||
concept_key=concept_key,
|
||||
count=len(items),
|
||||
weighted_mean_score=(weighted_score_sum / total_weight) if total_weight > 0 else 0.0,
|
||||
total_weight=total_weight,
|
||||
confidence=confidence_from_weight(total_weight),
|
||||
confidence=confidence,
|
||||
dimension_means=dimension_means,
|
||||
weak_dimensions=sorted(weak_dimensions),
|
||||
mastered=mastered,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -91,6 +114,8 @@ def add_evidence_item(
|
|||
item: EvidenceItem,
|
||||
type_weights: dict[str, float],
|
||||
recent_multiplier: float,
|
||||
dimension_thresholds: dict[str, float],
|
||||
confidence_threshold: float,
|
||||
) -> None:
|
||||
item.score = clamp_score(item.score)
|
||||
state.evidence_by_concept.setdefault(item.concept_key, []).append(item)
|
||||
|
|
@ -99,30 +124,21 @@ def add_evidence_item(
|
|||
state.evidence_by_concept[item.concept_key],
|
||||
type_weights,
|
||||
recent_multiplier,
|
||||
dimension_thresholds,
|
||||
confidence_threshold,
|
||||
)
|
||||
|
||||
|
||||
def update_profile_mastery_from_evidence(
|
||||
profile: LearnerProfile,
|
||||
state: EvidenceState,
|
||||
mastery_threshold: float,
|
||||
resurfacing_threshold: float,
|
||||
confidence_threshold: float,
|
||||
) -> None:
|
||||
for concept_key, summary in state.summary_by_concept.items():
|
||||
if summary.count == 0:
|
||||
continue
|
||||
|
||||
if (
|
||||
summary.weighted_mean_score >= mastery_threshold
|
||||
and summary.confidence >= confidence_threshold
|
||||
):
|
||||
if summary.mastered:
|
||||
profile.mastered_concepts.add(concept_key)
|
||||
state.resurfaced_concepts.discard(concept_key)
|
||||
elif (
|
||||
concept_key in profile.mastered_concepts
|
||||
and summary.weighted_mean_score < resurfacing_threshold
|
||||
):
|
||||
elif concept_key in profile.mastered_concepts and summary.weighted_mean_score < resurfacing_threshold:
|
||||
profile.mastered_concepts.remove(concept_key)
|
||||
state.resurfaced_concepts.add(concept_key)
|
||||
|
||||
|
|
@ -130,20 +146,25 @@ def update_profile_mastery_from_evidence(
|
|||
def ingest_evidence_bundle(
|
||||
profile: LearnerProfile,
|
||||
items: list[EvidenceItem],
|
||||
mastery_threshold: float,
|
||||
resurfacing_threshold: float,
|
||||
confidence_threshold: float,
|
||||
type_weights: dict[str, float],
|
||||
recent_multiplier: float,
|
||||
dimension_thresholds: dict[str, float],
|
||||
) -> EvidenceState:
|
||||
state = EvidenceState()
|
||||
for item in items:
|
||||
add_evidence_item(state, item, type_weights, recent_multiplier)
|
||||
add_evidence_item(
|
||||
state,
|
||||
item,
|
||||
type_weights,
|
||||
recent_multiplier,
|
||||
dimension_thresholds,
|
||||
confidence_threshold,
|
||||
)
|
||||
update_profile_mastery_from_evidence(
|
||||
profile=profile,
|
||||
state=state,
|
||||
mastery_threshold=mastery_threshold,
|
||||
resurfacing_threshold=resurfacing_threshold,
|
||||
confidence_threshold=confidence_threshold,
|
||||
)
|
||||
)
|
||||
return state
|
||||
|
|
|
|||
|
|
@ -26,8 +26,7 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
|||
|
||||
for pack_name in merged.load_order:
|
||||
result = valid[pack_name]
|
||||
concepts_file = result.loaded_files["concepts"]
|
||||
for concept in concepts_file.concepts:
|
||||
for concept in result.loaded_files["concepts"].concepts:
|
||||
key = namespaced_concept(pack_name, concept.id)
|
||||
merged.concept_data[key] = {
|
||||
"id": concept.id,
|
||||
|
|
@ -40,16 +39,13 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
|||
|
||||
for pack_name in merged.load_order:
|
||||
result = valid[pack_name]
|
||||
concepts_file = result.loaded_files["concepts"]
|
||||
for concept in concepts_file.concepts:
|
||||
for concept in result.loaded_files["concepts"].concepts:
|
||||
concept_key = namespaced_concept(pack_name, concept.id)
|
||||
for prereq in concept.prerequisites:
|
||||
prereq_key = namespaced_concept(pack_name, prereq)
|
||||
if prereq_key in merged.graph:
|
||||
merged.graph.add_edge(prereq_key, concept_key)
|
||||
|
||||
projects_file = result.loaded_files["projects"]
|
||||
for project in projects_file.projects:
|
||||
for project in result.loaded_files["projects"].projects:
|
||||
merged.project_catalog.append({
|
||||
"id": f"{pack_name}::{project.id}",
|
||||
"pack": pack_name,
|
||||
|
|
@ -58,5 +54,4 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
|||
"prerequisites": [namespaced_concept(pack_name, p) for p in project.prerequisites],
|
||||
"deliverables": list(project.deliverables),
|
||||
})
|
||||
|
||||
return merged
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from .artifact_registry import (
|
|||
)
|
||||
from .config import load_config
|
||||
from .evidence_engine import EvidenceItem, ingest_evidence_bundle
|
||||
from .evaluation import score_simple_rubric
|
||||
from .learning_graph import build_merged_learning_graph
|
||||
from .mentor import generate_socratic_prompt
|
||||
from .model_provider import ModelProvider
|
||||
|
|
@ -20,7 +19,7 @@ from .project_advisor import suggest_capstone
|
|||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="Didactopus weighted evidence scaffold")
|
||||
parser = argparse.ArgumentParser(description="Didactopus multi-dimensional mastery scaffold")
|
||||
parser.add_argument("--domain", required=True)
|
||||
parser.add_argument("--goal", required=True)
|
||||
parser.add_argument(
|
||||
|
|
@ -68,88 +67,67 @@ def main() -> None:
|
|||
hide_mastered=True,
|
||||
)
|
||||
|
||||
rubric = score_simple_rubric(0.92, 0.86, 0.82, 0.78)
|
||||
evidence_items = [
|
||||
EvidenceItem(
|
||||
concept_key="foundations-statistics::descriptive-statistics",
|
||||
evidence_type="explanation",
|
||||
score=rubric.mean(),
|
||||
is_recent=False,
|
||||
rubric_dimensions={
|
||||
"correctness": rubric.correctness,
|
||||
"clarity": rubric.clarity,
|
||||
"justification": rubric.justification,
|
||||
"transfer": rubric.transfer,
|
||||
},
|
||||
notes="Good explanation.",
|
||||
),
|
||||
EvidenceItem(
|
||||
concept_key="foundations-statistics::descriptive-statistics",
|
||||
evidence_type="project",
|
||||
score=0.9,
|
||||
score=0.88,
|
||||
is_recent=True,
|
||||
rubric_dimensions={
|
||||
"correctness": 0.9,
|
||||
"clarity": 0.84,
|
||||
"justification": 0.88,
|
||||
"transfer": 0.82,
|
||||
"explanation": 0.83,
|
||||
"transfer": 0.79,
|
||||
"project_execution": 0.88,
|
||||
"critique": 0.74,
|
||||
},
|
||||
notes="Strong project evidence.",
|
||||
notes="Strong integrated performance.",
|
||||
),
|
||||
EvidenceItem(
|
||||
concept_key="bayes-extension::prior",
|
||||
evidence_type="problem",
|
||||
score=0.58,
|
||||
score=0.68,
|
||||
is_recent=True,
|
||||
rubric_dimensions={
|
||||
"correctness": 0.6,
|
||||
"clarity": 0.55,
|
||||
"correctness": 0.75,
|
||||
"explanation": 0.62,
|
||||
"transfer": 0.55,
|
||||
"critique": 0.58,
|
||||
},
|
||||
notes="Recent weak but informative performance.",
|
||||
notes="Knows some basics, weak transfer and critique.",
|
||||
),
|
||||
]
|
||||
|
||||
evidence_state = ingest_evidence_bundle(
|
||||
profile=profile,
|
||||
items=evidence_items,
|
||||
mastery_threshold=config.platform.mastery_threshold,
|
||||
resurfacing_threshold=config.platform.resurfacing_threshold,
|
||||
confidence_threshold=config.platform.confidence_threshold,
|
||||
type_weights=config.platform.evidence_weights,
|
||||
recent_multiplier=config.platform.recent_evidence_multiplier,
|
||||
dimension_thresholds=config.platform.dimension_thresholds,
|
||||
)
|
||||
|
||||
plan = build_adaptive_plan(merged, profile)
|
||||
|
||||
print("== Weighted Evidence Summary ==")
|
||||
print("== Multi-Dimensional Evidence Summary ==")
|
||||
for concept_key, summary in evidence_state.summary_by_concept.items():
|
||||
print(
|
||||
f"- {concept_key}: count={summary.count}, "
|
||||
f"weighted_mean={summary.weighted_mean_score:.2f}, "
|
||||
f"confidence={summary.confidence:.2f}, "
|
||||
f"total_weight={summary.total_weight:.2f}"
|
||||
f"- {concept_key}: weighted_mean={summary.weighted_mean_score:.2f}, "
|
||||
f"confidence={summary.confidence:.2f}, mastered={summary.mastered}"
|
||||
)
|
||||
if summary.dimension_means:
|
||||
dims = ", ".join(f"{k}={v:.2f}" for k, v in sorted(summary.dimension_means.items()))
|
||||
print(f" * dimensions: {dims}")
|
||||
if summary.weak_dimensions:
|
||||
print(f" * weak dimensions: {', '.join(summary.weak_dimensions)}")
|
||||
print()
|
||||
|
||||
print("== Mastered Concepts After Weighted Evidence ==")
|
||||
for concept_key in sorted(profile.mastered_concepts):
|
||||
print(f"- {concept_key}")
|
||||
print()
|
||||
|
||||
print("== Resurfaced Concepts ==")
|
||||
if evidence_state.resurfaced_concepts:
|
||||
for concept_key in sorted(evidence_state.resurfaced_concepts):
|
||||
print("== Mastered Concepts ==")
|
||||
if profile.mastered_concepts:
|
||||
for concept_key in sorted(profile.mastered_concepts):
|
||||
print(f"- {concept_key}")
|
||||
else:
|
||||
print("- none")
|
||||
print()
|
||||
|
||||
print("== Adaptive Plan Summary ==")
|
||||
print(f"- roadmap items visible: {len(plan.learner_roadmap)}")
|
||||
print(f"- next-best concepts: {len(plan.next_best_concepts)}")
|
||||
print(f"- eligible projects: {len(plan.eligible_projects)}")
|
||||
print("- none yet")
|
||||
print()
|
||||
|
||||
print("== Next Best Concepts ==")
|
||||
|
|
@ -157,7 +135,8 @@ def main() -> None:
|
|||
print(f"- {concept}")
|
||||
print()
|
||||
|
||||
focus_concept = plan.next_best_concepts[0] if plan.next_best_concepts else args.domain
|
||||
print(generate_socratic_prompt(provider, focus_concept))
|
||||
print(generate_practice_task(provider, focus_concept))
|
||||
focus_concept = "bayes-extension::prior"
|
||||
weak_dims = evidence_state.summary_by_concept.get(focus_concept).weak_dimensions if focus_concept in evidence_state.summary_by_concept else []
|
||||
print(generate_socratic_prompt(provider, focus_concept, weak_dims))
|
||||
print(generate_practice_task(provider, focus_concept, weak_dims))
|
||||
print(suggest_capstone(provider, args.domain))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
from .model_provider import ModelProvider
|
||||
|
||||
|
||||
def generate_socratic_prompt(provider: ModelProvider, concept: str) -> str:
|
||||
def generate_socratic_prompt(provider: ModelProvider, concept: str, weak_dimensions: list[str] | None = None) -> str:
|
||||
weak_text = ""
|
||||
if weak_dimensions:
|
||||
weak_text = f" Focus especially on weak dimensions: {', '.join(weak_dimensions)}."
|
||||
return provider.generate(
|
||||
f"You are a Socratic mentor. Ask one probing question about '{concept}'."
|
||||
f"You are a Socratic mentor. Ask one probing question about '{concept}'.{weak_text}"
|
||||
).text
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
from .model_provider import ModelProvider
|
||||
|
||||
|
||||
def generate_practice_task(provider: ModelProvider, concept: str) -> str:
|
||||
def generate_practice_task(provider: ModelProvider, concept: str, weak_dimensions: list[str] | None = None) -> str:
|
||||
weak_text = ""
|
||||
if weak_dimensions:
|
||||
weak_text = f" Target the weak dimensions: {', '.join(weak_dimensions)}."
|
||||
return provider.generate(
|
||||
f"Generate one reasoning-heavy practice task for '{concept}'."
|
||||
f"Generate one reasoning-heavy practice task for '{concept}'.{weak_text}"
|
||||
).text
|
||||
|
|
|
|||
|
|
@ -4,5 +4,5 @@ from didactopus.config import load_config
|
|||
|
||||
def test_load_example_config() -> None:
|
||||
config = load_config(Path("configs/config.example.yaml"))
|
||||
assert config.platform.evidence_weights["project"] == 2.5
|
||||
assert config.platform.recent_evidence_multiplier == 1.35
|
||||
assert config.platform.dimension_thresholds["transfer"] == 0.7
|
||||
assert config.platform.confidence_threshold == 0.8
|
||||
|
|
|
|||
|
|
@ -0,0 +1,100 @@
|
|||
from didactopus.adaptive_engine import LearnerProfile
|
||||
from didactopus.evidence_engine import EvidenceItem, ingest_evidence_bundle
|
||||
|
||||
|
||||
DEFAULT_WEIGHTS = {"explanation": 1.0, "problem": 1.5, "project": 2.5, "transfer": 2.0}
|
||||
DEFAULT_THRESHOLDS = {
|
||||
"correctness": 0.8,
|
||||
"explanation": 0.75,
|
||||
"transfer": 0.7,
|
||||
"project_execution": 0.75,
|
||||
"critique": 0.7,
|
||||
}
|
||||
|
||||
|
||||
def test_full_multidim_mastery() -> None:
|
||||
profile = LearnerProfile(learner_id="u1")
|
||||
state = ingest_evidence_bundle(
|
||||
profile,
|
||||
[
|
||||
EvidenceItem(
|
||||
"c1",
|
||||
"project",
|
||||
0.9,
|
||||
is_recent=True,
|
||||
rubric_dimensions={
|
||||
"correctness": 0.88,
|
||||
"explanation": 0.82,
|
||||
"transfer": 0.77,
|
||||
"project_execution": 0.9,
|
||||
"critique": 0.76,
|
||||
},
|
||||
)
|
||||
],
|
||||
resurfacing_threshold=0.55,
|
||||
confidence_threshold=0.75,
|
||||
type_weights=DEFAULT_WEIGHTS,
|
||||
recent_multiplier=1.35,
|
||||
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||
)
|
||||
assert "c1" in profile.mastered_concepts
|
||||
assert state.summary_by_concept["c1"].mastered is True
|
||||
assert state.summary_by_concept["c1"].weak_dimensions == []
|
||||
|
||||
|
||||
def test_partial_weakness_blocks_mastery() -> None:
|
||||
profile = LearnerProfile(learner_id="u1")
|
||||
state = ingest_evidence_bundle(
|
||||
profile,
|
||||
[
|
||||
EvidenceItem(
|
||||
"c1",
|
||||
"project",
|
||||
0.85,
|
||||
is_recent=True,
|
||||
rubric_dimensions={
|
||||
"correctness": 0.9,
|
||||
"explanation": 0.86,
|
||||
"transfer": 0.52,
|
||||
"project_execution": 0.88,
|
||||
"critique": 0.8,
|
||||
},
|
||||
)
|
||||
],
|
||||
resurfacing_threshold=0.55,
|
||||
confidence_threshold=0.75,
|
||||
type_weights=DEFAULT_WEIGHTS,
|
||||
recent_multiplier=1.35,
|
||||
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||
)
|
||||
assert "c1" not in profile.mastered_concepts
|
||||
assert state.summary_by_concept["c1"].mastered is False
|
||||
assert "transfer" in state.summary_by_concept["c1"].weak_dimensions
|
||||
|
||||
|
||||
def test_resurfacing_from_multidim_weakness() -> None:
|
||||
profile = LearnerProfile(learner_id="u1", mastered_concepts={"c1"})
|
||||
state = ingest_evidence_bundle(
|
||||
profile,
|
||||
[
|
||||
EvidenceItem(
|
||||
"c1",
|
||||
"problem",
|
||||
0.45,
|
||||
is_recent=True,
|
||||
rubric_dimensions={
|
||||
"correctness": 0.45,
|
||||
"explanation": 0.5,
|
||||
"transfer": 0.4,
|
||||
"critique": 0.42,
|
||||
},
|
||||
)
|
||||
],
|
||||
resurfacing_threshold=0.55,
|
||||
confidence_threshold=0.75,
|
||||
type_weights=DEFAULT_WEIGHTS,
|
||||
recent_multiplier=1.35,
|
||||
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||
)
|
||||
assert "c1" not in profile.mastered_concepts
|
||||
assert "c1" in state.resurfaced_concepts
|
||||
Loading…
Reference in New Issue