Multidimensional mastery additions
This commit is contained in:
parent
d682d2774f
commit
4dcca8bc3f
26
README.md
26
README.md
|
|
@ -2,21 +2,25 @@
|
||||||
|
|
||||||
**Didactopus** is a local-first AI-assisted autodidactic mastery platform.
|
**Didactopus** is a local-first AI-assisted autodidactic mastery platform.
|
||||||
|
|
||||||
This revision upgrades the evidence layer from simple averaging to a more realistic weighted and recency-aware mastery model.
|
This revision upgrades the evidence layer from a single weighted score to a **multi-dimensional mastery model**.
|
||||||
|
|
||||||
## Added in this revision
|
## Added in this revision
|
||||||
|
|
||||||
- evidence-type weighting
|
- per-concept mastery dimensions:
|
||||||
- recency weighting
|
- correctness
|
||||||
- confidence estimation from weighted evidence mass
|
- explanation
|
||||||
- dimension-level rubric storage
|
- transfer
|
||||||
- weighted concept summaries
|
- project_execution
|
||||||
- mastery decisions using weighted score and confidence
|
- critique
|
||||||
- resurfacing from recent weak evidence
|
- weighted, recency-aware dimension summaries
|
||||||
- tests for weighted scoring and recency behavior
|
- per-dimension mastery thresholds
|
||||||
|
- concept-level mastery determined from all required dimensions
|
||||||
|
- dimension-specific weakness reporting
|
||||||
|
- adaptive next-step selection informed by weak dimensions
|
||||||
|
- tests for multi-dimensional mastery promotion and partial weakness detection
|
||||||
|
|
||||||
## Why this matters
|
## Why this matters
|
||||||
|
|
||||||
Not all evidence should count equally.
|
Real mastery is not one scalar.
|
||||||
|
|
||||||
A capstone project or transfer task should usually matter more than a short explanation, and recent poor performance should sometimes matter more than older success. This revision begins to model that explicitly.
|
A learner can be strong at routine correctness and still be weak at transfer, explanation, or critique. This revision lets Didactopus represent that distinction explicitly.
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,6 @@ platform:
|
||||||
verification_required: true
|
verification_required: true
|
||||||
require_learner_explanations: true
|
require_learner_explanations: true
|
||||||
permit_direct_answers: false
|
permit_direct_answers: false
|
||||||
mastery_threshold: 0.8
|
|
||||||
resurfacing_threshold: 0.55
|
resurfacing_threshold: 0.55
|
||||||
confidence_threshold: 0.8
|
confidence_threshold: 0.8
|
||||||
evidence_weights:
|
evidence_weights:
|
||||||
|
|
@ -23,6 +22,12 @@ platform:
|
||||||
project: 2.5
|
project: 2.5
|
||||||
transfer: 2.0
|
transfer: 2.0
|
||||||
recent_evidence_multiplier: 1.35
|
recent_evidence_multiplier: 1.35
|
||||||
|
dimension_thresholds:
|
||||||
|
correctness: 0.8
|
||||||
|
explanation: 0.75
|
||||||
|
transfer: 0.7
|
||||||
|
project_execution: 0.75
|
||||||
|
critique: 0.7
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
local_pack_dirs:
|
local_pack_dirs:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Multi-Dimensional Mastery Model
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Didactopus should distinguish among different forms of competence rather than collapsing them into one number.
|
||||||
|
|
||||||
|
## Dimensions in this revision
|
||||||
|
|
||||||
|
- **correctness**: routine technical correctness
|
||||||
|
- **explanation**: ability to explain clearly and justify reasoning
|
||||||
|
- **transfer**: ability to apply knowledge in new contexts
|
||||||
|
- **project_execution**: ability to carry work through in an authentic task
|
||||||
|
- **critique**: ability to detect flaws and evaluate reasoning
|
||||||
|
|
||||||
|
## Current rule
|
||||||
|
|
||||||
|
A concept counts as mastered only if:
|
||||||
|
- confidence meets threshold
|
||||||
|
- every required dimension present for that concept meets its configured threshold
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
A learner can now be:
|
||||||
|
- **ready in correctness but weak in transfer**
|
||||||
|
- **strong in explanations but weak in project execution**
|
||||||
|
- **mastered overall only when all required dimensions are adequate**
|
||||||
|
|
||||||
|
## Future work
|
||||||
|
|
||||||
|
- concept-specific dimension requirements
|
||||||
|
- different thresholds by domain
|
||||||
|
- prerequisite softening based on partial dimension mastery
|
||||||
|
- deliberate weak-area practice generation
|
||||||
|
|
@ -7,14 +7,7 @@ import yaml
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
|
|
||||||
from . import __version__ as DIDACTOPUS_VERSION
|
from . import __version__ as DIDACTOPUS_VERSION
|
||||||
from .artifact_schemas import (
|
from .artifact_schemas import ConceptsFile, PackManifest, ProjectsFile, RoadmapFile, RubricsFile
|
||||||
ConceptsFile,
|
|
||||||
PackManifest,
|
|
||||||
ProjectsFile,
|
|
||||||
RoadmapFile,
|
|
||||||
RubricsFile,
|
|
||||||
validate_top_level_key,
|
|
||||||
)
|
|
||||||
|
|
||||||
REQUIRED_FILES = ["pack.yaml", "concepts.yaml", "roadmap.yaml", "projects.yaml", "rubrics.yaml"]
|
REQUIRED_FILES = ["pack.yaml", "concepts.yaml", "roadmap.yaml", "projects.yaml", "rubrics.yaml"]
|
||||||
|
|
||||||
|
|
@ -48,13 +41,11 @@ def _load_yaml(path: Path) -> dict[str, Any]:
|
||||||
def validate_pack(pack_dir: str | Path) -> PackValidationResult:
|
def validate_pack(pack_dir: str | Path) -> PackValidationResult:
|
||||||
pack_path = Path(pack_dir)
|
pack_path = Path(pack_dir)
|
||||||
result = PackValidationResult(pack_dir=pack_path)
|
result = PackValidationResult(pack_dir=pack_path)
|
||||||
|
|
||||||
for filename in REQUIRED_FILES:
|
for filename in REQUIRED_FILES:
|
||||||
if not (pack_path / filename).exists():
|
if not (pack_path / filename).exists():
|
||||||
result.errors.append(f"missing required file: {filename}")
|
result.errors.append(f"missing required file: {filename}")
|
||||||
if result.errors:
|
if result.errors:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result.manifest = PackManifest.model_validate(_load_yaml(pack_path / "pack.yaml"))
|
result.manifest = PackManifest.model_validate(_load_yaml(pack_path / "pack.yaml"))
|
||||||
if not _version_in_range(DIDACTOPUS_VERSION, result.manifest.didactopus_min_version, result.manifest.didactopus_max_version):
|
if not _version_in_range(DIDACTOPUS_VERSION, result.manifest.didactopus_min_version, result.manifest.didactopus_max_version):
|
||||||
|
|
@ -62,19 +53,12 @@ def validate_pack(pack_dir: str | Path) -> PackValidationResult:
|
||||||
f"incompatible with Didactopus core version {DIDACTOPUS_VERSION}; supported range is "
|
f"incompatible with Didactopus core version {DIDACTOPUS_VERSION}; supported range is "
|
||||||
f"{result.manifest.didactopus_min_version}..{result.manifest.didactopus_max_version}"
|
f"{result.manifest.didactopus_min_version}..{result.manifest.didactopus_max_version}"
|
||||||
)
|
)
|
||||||
|
result.loaded_files["concepts"] = ConceptsFile.model_validate(_load_yaml(pack_path / "concepts.yaml"))
|
||||||
concepts = ConceptsFile.model_validate(_load_yaml(pack_path / "concepts.yaml"))
|
result.loaded_files["roadmap"] = RoadmapFile.model_validate(_load_yaml(pack_path / "roadmap.yaml"))
|
||||||
roadmap = RoadmapFile.model_validate(_load_yaml(pack_path / "roadmap.yaml"))
|
result.loaded_files["projects"] = ProjectsFile.model_validate(_load_yaml(pack_path / "projects.yaml"))
|
||||||
projects = ProjectsFile.model_validate(_load_yaml(pack_path / "projects.yaml"))
|
result.loaded_files["rubrics"] = RubricsFile.model_validate(_load_yaml(pack_path / "rubrics.yaml"))
|
||||||
rubrics = RubricsFile.model_validate(_load_yaml(pack_path / "rubrics.yaml"))
|
|
||||||
|
|
||||||
result.loaded_files["concepts"] = concepts
|
|
||||||
result.loaded_files["roadmap"] = roadmap
|
|
||||||
result.loaded_files["projects"] = projects
|
|
||||||
result.loaded_files["rubrics"] = rubrics
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
result.errors.append(str(exc))
|
result.errors.append(str(exc))
|
||||||
|
|
||||||
result.is_valid = not result.errors
|
result.is_valid = not result.errors
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -56,15 +56,5 @@ class ProjectsFile(BaseModel):
|
||||||
projects: list[ProjectEntry]
|
projects: list[ProjectEntry]
|
||||||
|
|
||||||
|
|
||||||
class RubricEntry(BaseModel):
|
|
||||||
id: str
|
|
||||||
title: str
|
|
||||||
criteria: list[str] = Field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
class RubricsFile(BaseModel):
|
class RubricsFile(BaseModel):
|
||||||
rubrics: list[RubricEntry]
|
rubrics: list[dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
def validate_top_level_key(data: dict[str, Any], required_key: str) -> list[str]:
|
|
||||||
return [] if required_key in data else [f"missing required top-level key: {required_key}"]
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ class PlatformConfig(BaseModel):
|
||||||
verification_required: bool = True
|
verification_required: bool = True
|
||||||
require_learner_explanations: bool = True
|
require_learner_explanations: bool = True
|
||||||
permit_direct_answers: bool = False
|
permit_direct_answers: bool = False
|
||||||
mastery_threshold: float = 0.8
|
|
||||||
resurfacing_threshold: float = 0.55
|
resurfacing_threshold: float = 0.55
|
||||||
confidence_threshold: float = 0.8
|
confidence_threshold: float = 0.8
|
||||||
evidence_weights: dict[str, float] = Field(
|
evidence_weights: dict[str, float] = Field(
|
||||||
|
|
@ -38,6 +37,15 @@ class PlatformConfig(BaseModel):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
recent_evidence_multiplier: float = 1.35
|
recent_evidence_multiplier: float = 1.35
|
||||||
|
dimension_thresholds: dict[str, float] = Field(
|
||||||
|
default_factory=lambda: {
|
||||||
|
"correctness": 0.8,
|
||||||
|
"explanation": 0.75,
|
||||||
|
"transfer": 0.7,
|
||||||
|
"project_execution": 0.75,
|
||||||
|
"critique": 0.7,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ArtifactConfig(BaseModel):
|
class ArtifactConfig(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from typing import Literal
|
||||||
from .adaptive_engine import LearnerProfile
|
from .adaptive_engine import LearnerProfile
|
||||||
|
|
||||||
EvidenceType = Literal["explanation", "problem", "project", "transfer"]
|
EvidenceType = Literal["explanation", "problem", "project", "transfer"]
|
||||||
|
MASTERY_DIMENSIONS = ["correctness", "explanation", "transfer", "project_execution", "critique"]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -26,6 +27,8 @@ class ConceptEvidenceSummary:
|
||||||
total_weight: float = 0.0
|
total_weight: float = 0.0
|
||||||
confidence: float = 0.0
|
confidence: float = 0.0
|
||||||
dimension_means: dict[str, float] = field(default_factory=dict)
|
dimension_means: dict[str, float] = field(default_factory=dict)
|
||||||
|
weak_dimensions: list[str] = field(default_factory=list)
|
||||||
|
mastered: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -53,11 +56,13 @@ def recompute_concept_summary(
|
||||||
items: list[EvidenceItem],
|
items: list[EvidenceItem],
|
||||||
type_weights: dict[str, float],
|
type_weights: dict[str, float],
|
||||||
recent_multiplier: float,
|
recent_multiplier: float,
|
||||||
|
dimension_thresholds: dict[str, float],
|
||||||
|
confidence_threshold: float,
|
||||||
) -> ConceptEvidenceSummary:
|
) -> ConceptEvidenceSummary:
|
||||||
weighted_score_sum = 0.0
|
weighted_score_sum = 0.0
|
||||||
total_weight = 0.0
|
total_weight = 0.0
|
||||||
dimension_totals: dict[str, float] = {}
|
dim_totals: dict[str, float] = {}
|
||||||
dimension_weights: dict[str, float] = {}
|
dim_weights: dict[str, float] = {}
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
item.score = clamp_score(item.score)
|
item.score = clamp_score(item.score)
|
||||||
|
|
@ -65,24 +70,42 @@ def recompute_concept_summary(
|
||||||
weighted_score_sum += item.score * w
|
weighted_score_sum += item.score * w
|
||||||
total_weight += w
|
total_weight += w
|
||||||
|
|
||||||
for dim, val in item.rubric_dimensions.items():
|
for dim, value in item.rubric_dimensions.items():
|
||||||
v = clamp_score(val)
|
v = clamp_score(value)
|
||||||
dimension_totals[dim] = dimension_totals.get(dim, 0.0) + v * w
|
dim_totals[dim] = dim_totals.get(dim, 0.0) + v * w
|
||||||
dimension_weights[dim] = dimension_weights.get(dim, 0.0) + w
|
dim_weights[dim] = dim_weights.get(dim, 0.0) + w
|
||||||
|
|
||||||
dimension_means = {
|
dimension_means = {
|
||||||
dim: (dimension_totals[dim] / dimension_weights[dim])
|
dim: dim_totals[dim] / dim_weights[dim]
|
||||||
for dim in dimension_totals
|
for dim in dim_totals
|
||||||
if dimension_weights[dim] > 0
|
if dim_weights[dim] > 0
|
||||||
}
|
}
|
||||||
|
confidence = confidence_from_weight(total_weight)
|
||||||
|
|
||||||
|
weak_dimensions = []
|
||||||
|
for dim, threshold in dimension_thresholds.items():
|
||||||
|
if dim in dimension_means and dimension_means[dim] < threshold:
|
||||||
|
weak_dimensions.append(dim)
|
||||||
|
|
||||||
|
mastered = (
|
||||||
|
confidence >= confidence_threshold
|
||||||
|
and all(
|
||||||
|
(dim in dimension_means and dimension_means[dim] >= threshold)
|
||||||
|
for dim, threshold in dimension_thresholds.items()
|
||||||
|
if dim in dimension_means
|
||||||
|
)
|
||||||
|
and len(dimension_means) > 0
|
||||||
|
)
|
||||||
|
|
||||||
return ConceptEvidenceSummary(
|
return ConceptEvidenceSummary(
|
||||||
concept_key=concept_key,
|
concept_key=concept_key,
|
||||||
count=len(items),
|
count=len(items),
|
||||||
weighted_mean_score=(weighted_score_sum / total_weight) if total_weight > 0 else 0.0,
|
weighted_mean_score=(weighted_score_sum / total_weight) if total_weight > 0 else 0.0,
|
||||||
total_weight=total_weight,
|
total_weight=total_weight,
|
||||||
confidence=confidence_from_weight(total_weight),
|
confidence=confidence,
|
||||||
dimension_means=dimension_means,
|
dimension_means=dimension_means,
|
||||||
|
weak_dimensions=sorted(weak_dimensions),
|
||||||
|
mastered=mastered,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -91,6 +114,8 @@ def add_evidence_item(
|
||||||
item: EvidenceItem,
|
item: EvidenceItem,
|
||||||
type_weights: dict[str, float],
|
type_weights: dict[str, float],
|
||||||
recent_multiplier: float,
|
recent_multiplier: float,
|
||||||
|
dimension_thresholds: dict[str, float],
|
||||||
|
confidence_threshold: float,
|
||||||
) -> None:
|
) -> None:
|
||||||
item.score = clamp_score(item.score)
|
item.score = clamp_score(item.score)
|
||||||
state.evidence_by_concept.setdefault(item.concept_key, []).append(item)
|
state.evidence_by_concept.setdefault(item.concept_key, []).append(item)
|
||||||
|
|
@ -99,30 +124,21 @@ def add_evidence_item(
|
||||||
state.evidence_by_concept[item.concept_key],
|
state.evidence_by_concept[item.concept_key],
|
||||||
type_weights,
|
type_weights,
|
||||||
recent_multiplier,
|
recent_multiplier,
|
||||||
|
dimension_thresholds,
|
||||||
|
confidence_threshold,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def update_profile_mastery_from_evidence(
|
def update_profile_mastery_from_evidence(
|
||||||
profile: LearnerProfile,
|
profile: LearnerProfile,
|
||||||
state: EvidenceState,
|
state: EvidenceState,
|
||||||
mastery_threshold: float,
|
|
||||||
resurfacing_threshold: float,
|
resurfacing_threshold: float,
|
||||||
confidence_threshold: float,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
for concept_key, summary in state.summary_by_concept.items():
|
for concept_key, summary in state.summary_by_concept.items():
|
||||||
if summary.count == 0:
|
if summary.mastered:
|
||||||
continue
|
|
||||||
|
|
||||||
if (
|
|
||||||
summary.weighted_mean_score >= mastery_threshold
|
|
||||||
and summary.confidence >= confidence_threshold
|
|
||||||
):
|
|
||||||
profile.mastered_concepts.add(concept_key)
|
profile.mastered_concepts.add(concept_key)
|
||||||
state.resurfaced_concepts.discard(concept_key)
|
state.resurfaced_concepts.discard(concept_key)
|
||||||
elif (
|
elif concept_key in profile.mastered_concepts and summary.weighted_mean_score < resurfacing_threshold:
|
||||||
concept_key in profile.mastered_concepts
|
|
||||||
and summary.weighted_mean_score < resurfacing_threshold
|
|
||||||
):
|
|
||||||
profile.mastered_concepts.remove(concept_key)
|
profile.mastered_concepts.remove(concept_key)
|
||||||
state.resurfaced_concepts.add(concept_key)
|
state.resurfaced_concepts.add(concept_key)
|
||||||
|
|
||||||
|
|
@ -130,20 +146,25 @@ def update_profile_mastery_from_evidence(
|
||||||
def ingest_evidence_bundle(
|
def ingest_evidence_bundle(
|
||||||
profile: LearnerProfile,
|
profile: LearnerProfile,
|
||||||
items: list[EvidenceItem],
|
items: list[EvidenceItem],
|
||||||
mastery_threshold: float,
|
|
||||||
resurfacing_threshold: float,
|
resurfacing_threshold: float,
|
||||||
confidence_threshold: float,
|
confidence_threshold: float,
|
||||||
type_weights: dict[str, float],
|
type_weights: dict[str, float],
|
||||||
recent_multiplier: float,
|
recent_multiplier: float,
|
||||||
|
dimension_thresholds: dict[str, float],
|
||||||
) -> EvidenceState:
|
) -> EvidenceState:
|
||||||
state = EvidenceState()
|
state = EvidenceState()
|
||||||
for item in items:
|
for item in items:
|
||||||
add_evidence_item(state, item, type_weights, recent_multiplier)
|
add_evidence_item(
|
||||||
|
state,
|
||||||
|
item,
|
||||||
|
type_weights,
|
||||||
|
recent_multiplier,
|
||||||
|
dimension_thresholds,
|
||||||
|
confidence_threshold,
|
||||||
|
)
|
||||||
update_profile_mastery_from_evidence(
|
update_profile_mastery_from_evidence(
|
||||||
profile=profile,
|
profile=profile,
|
||||||
state=state,
|
state=state,
|
||||||
mastery_threshold=mastery_threshold,
|
|
||||||
resurfacing_threshold=resurfacing_threshold,
|
resurfacing_threshold=resurfacing_threshold,
|
||||||
confidence_threshold=confidence_threshold,
|
)
|
||||||
)
|
|
||||||
return state
|
return state
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,7 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
||||||
|
|
||||||
for pack_name in merged.load_order:
|
for pack_name in merged.load_order:
|
||||||
result = valid[pack_name]
|
result = valid[pack_name]
|
||||||
concepts_file = result.loaded_files["concepts"]
|
for concept in result.loaded_files["concepts"].concepts:
|
||||||
for concept in concepts_file.concepts:
|
|
||||||
key = namespaced_concept(pack_name, concept.id)
|
key = namespaced_concept(pack_name, concept.id)
|
||||||
merged.concept_data[key] = {
|
merged.concept_data[key] = {
|
||||||
"id": concept.id,
|
"id": concept.id,
|
||||||
|
|
@ -40,16 +39,13 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
||||||
|
|
||||||
for pack_name in merged.load_order:
|
for pack_name in merged.load_order:
|
||||||
result = valid[pack_name]
|
result = valid[pack_name]
|
||||||
concepts_file = result.loaded_files["concepts"]
|
for concept in result.loaded_files["concepts"].concepts:
|
||||||
for concept in concepts_file.concepts:
|
|
||||||
concept_key = namespaced_concept(pack_name, concept.id)
|
concept_key = namespaced_concept(pack_name, concept.id)
|
||||||
for prereq in concept.prerequisites:
|
for prereq in concept.prerequisites:
|
||||||
prereq_key = namespaced_concept(pack_name, prereq)
|
prereq_key = namespaced_concept(pack_name, prereq)
|
||||||
if prereq_key in merged.graph:
|
if prereq_key in merged.graph:
|
||||||
merged.graph.add_edge(prereq_key, concept_key)
|
merged.graph.add_edge(prereq_key, concept_key)
|
||||||
|
for project in result.loaded_files["projects"].projects:
|
||||||
projects_file = result.loaded_files["projects"]
|
|
||||||
for project in projects_file.projects:
|
|
||||||
merged.project_catalog.append({
|
merged.project_catalog.append({
|
||||||
"id": f"{pack_name}::{project.id}",
|
"id": f"{pack_name}::{project.id}",
|
||||||
"pack": pack_name,
|
"pack": pack_name,
|
||||||
|
|
@ -58,5 +54,4 @@ def build_merged_learning_graph(results: list[PackValidationResult]) -> MergedLe
|
||||||
"prerequisites": [namespaced_concept(pack_name, p) for p in project.prerequisites],
|
"prerequisites": [namespaced_concept(pack_name, p) for p in project.prerequisites],
|
||||||
"deliverables": list(project.deliverables),
|
"deliverables": list(project.deliverables),
|
||||||
})
|
})
|
||||||
|
|
||||||
return merged
|
return merged
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from .artifact_registry import (
|
||||||
)
|
)
|
||||||
from .config import load_config
|
from .config import load_config
|
||||||
from .evidence_engine import EvidenceItem, ingest_evidence_bundle
|
from .evidence_engine import EvidenceItem, ingest_evidence_bundle
|
||||||
from .evaluation import score_simple_rubric
|
|
||||||
from .learning_graph import build_merged_learning_graph
|
from .learning_graph import build_merged_learning_graph
|
||||||
from .mentor import generate_socratic_prompt
|
from .mentor import generate_socratic_prompt
|
||||||
from .model_provider import ModelProvider
|
from .model_provider import ModelProvider
|
||||||
|
|
@ -20,7 +19,7 @@ from .project_advisor import suggest_capstone
|
||||||
|
|
||||||
|
|
||||||
def build_parser() -> argparse.ArgumentParser:
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
parser = argparse.ArgumentParser(description="Didactopus weighted evidence scaffold")
|
parser = argparse.ArgumentParser(description="Didactopus multi-dimensional mastery scaffold")
|
||||||
parser.add_argument("--domain", required=True)
|
parser.add_argument("--domain", required=True)
|
||||||
parser.add_argument("--goal", required=True)
|
parser.add_argument("--goal", required=True)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
@ -68,88 +67,67 @@ def main() -> None:
|
||||||
hide_mastered=True,
|
hide_mastered=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
rubric = score_simple_rubric(0.92, 0.86, 0.82, 0.78)
|
|
||||||
evidence_items = [
|
evidence_items = [
|
||||||
EvidenceItem(
|
|
||||||
concept_key="foundations-statistics::descriptive-statistics",
|
|
||||||
evidence_type="explanation",
|
|
||||||
score=rubric.mean(),
|
|
||||||
is_recent=False,
|
|
||||||
rubric_dimensions={
|
|
||||||
"correctness": rubric.correctness,
|
|
||||||
"clarity": rubric.clarity,
|
|
||||||
"justification": rubric.justification,
|
|
||||||
"transfer": rubric.transfer,
|
|
||||||
},
|
|
||||||
notes="Good explanation.",
|
|
||||||
),
|
|
||||||
EvidenceItem(
|
EvidenceItem(
|
||||||
concept_key="foundations-statistics::descriptive-statistics",
|
concept_key="foundations-statistics::descriptive-statistics",
|
||||||
evidence_type="project",
|
evidence_type="project",
|
||||||
score=0.9,
|
score=0.88,
|
||||||
is_recent=True,
|
is_recent=True,
|
||||||
rubric_dimensions={
|
rubric_dimensions={
|
||||||
"correctness": 0.9,
|
"correctness": 0.9,
|
||||||
"clarity": 0.84,
|
"explanation": 0.83,
|
||||||
"justification": 0.88,
|
"transfer": 0.79,
|
||||||
"transfer": 0.82,
|
"project_execution": 0.88,
|
||||||
|
"critique": 0.74,
|
||||||
},
|
},
|
||||||
notes="Strong project evidence.",
|
notes="Strong integrated performance.",
|
||||||
),
|
),
|
||||||
EvidenceItem(
|
EvidenceItem(
|
||||||
concept_key="bayes-extension::prior",
|
concept_key="bayes-extension::prior",
|
||||||
evidence_type="problem",
|
evidence_type="problem",
|
||||||
score=0.58,
|
score=0.68,
|
||||||
is_recent=True,
|
is_recent=True,
|
||||||
rubric_dimensions={
|
rubric_dimensions={
|
||||||
"correctness": 0.6,
|
"correctness": 0.75,
|
||||||
"clarity": 0.55,
|
"explanation": 0.62,
|
||||||
|
"transfer": 0.55,
|
||||||
|
"critique": 0.58,
|
||||||
},
|
},
|
||||||
notes="Recent weak but informative performance.",
|
notes="Knows some basics, weak transfer and critique.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
evidence_state = ingest_evidence_bundle(
|
evidence_state = ingest_evidence_bundle(
|
||||||
profile=profile,
|
profile=profile,
|
||||||
items=evidence_items,
|
items=evidence_items,
|
||||||
mastery_threshold=config.platform.mastery_threshold,
|
|
||||||
resurfacing_threshold=config.platform.resurfacing_threshold,
|
resurfacing_threshold=config.platform.resurfacing_threshold,
|
||||||
confidence_threshold=config.platform.confidence_threshold,
|
confidence_threshold=config.platform.confidence_threshold,
|
||||||
type_weights=config.platform.evidence_weights,
|
type_weights=config.platform.evidence_weights,
|
||||||
recent_multiplier=config.platform.recent_evidence_multiplier,
|
recent_multiplier=config.platform.recent_evidence_multiplier,
|
||||||
|
dimension_thresholds=config.platform.dimension_thresholds,
|
||||||
)
|
)
|
||||||
|
|
||||||
plan = build_adaptive_plan(merged, profile)
|
plan = build_adaptive_plan(merged, profile)
|
||||||
|
|
||||||
print("== Weighted Evidence Summary ==")
|
print("== Multi-Dimensional Evidence Summary ==")
|
||||||
for concept_key, summary in evidence_state.summary_by_concept.items():
|
for concept_key, summary in evidence_state.summary_by_concept.items():
|
||||||
print(
|
print(
|
||||||
f"- {concept_key}: count={summary.count}, "
|
f"- {concept_key}: weighted_mean={summary.weighted_mean_score:.2f}, "
|
||||||
f"weighted_mean={summary.weighted_mean_score:.2f}, "
|
f"confidence={summary.confidence:.2f}, mastered={summary.mastered}"
|
||||||
f"confidence={summary.confidence:.2f}, "
|
|
||||||
f"total_weight={summary.total_weight:.2f}"
|
|
||||||
)
|
)
|
||||||
if summary.dimension_means:
|
if summary.dimension_means:
|
||||||
dims = ", ".join(f"{k}={v:.2f}" for k, v in sorted(summary.dimension_means.items()))
|
dims = ", ".join(f"{k}={v:.2f}" for k, v in sorted(summary.dimension_means.items()))
|
||||||
print(f" * dimensions: {dims}")
|
print(f" * dimensions: {dims}")
|
||||||
|
if summary.weak_dimensions:
|
||||||
|
print(f" * weak dimensions: {', '.join(summary.weak_dimensions)}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print("== Mastered Concepts After Weighted Evidence ==")
|
print("== Mastered Concepts ==")
|
||||||
for concept_key in sorted(profile.mastered_concepts):
|
if profile.mastered_concepts:
|
||||||
print(f"- {concept_key}")
|
for concept_key in sorted(profile.mastered_concepts):
|
||||||
print()
|
|
||||||
|
|
||||||
print("== Resurfaced Concepts ==")
|
|
||||||
if evidence_state.resurfaced_concepts:
|
|
||||||
for concept_key in sorted(evidence_state.resurfaced_concepts):
|
|
||||||
print(f"- {concept_key}")
|
print(f"- {concept_key}")
|
||||||
else:
|
else:
|
||||||
print("- none")
|
print("- none yet")
|
||||||
print()
|
|
||||||
|
|
||||||
print("== Adaptive Plan Summary ==")
|
|
||||||
print(f"- roadmap items visible: {len(plan.learner_roadmap)}")
|
|
||||||
print(f"- next-best concepts: {len(plan.next_best_concepts)}")
|
|
||||||
print(f"- eligible projects: {len(plan.eligible_projects)}")
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print("== Next Best Concepts ==")
|
print("== Next Best Concepts ==")
|
||||||
|
|
@ -157,7 +135,8 @@ def main() -> None:
|
||||||
print(f"- {concept}")
|
print(f"- {concept}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
focus_concept = plan.next_best_concepts[0] if plan.next_best_concepts else args.domain
|
focus_concept = "bayes-extension::prior"
|
||||||
print(generate_socratic_prompt(provider, focus_concept))
|
weak_dims = evidence_state.summary_by_concept.get(focus_concept).weak_dimensions if focus_concept in evidence_state.summary_by_concept else []
|
||||||
print(generate_practice_task(provider, focus_concept))
|
print(generate_socratic_prompt(provider, focus_concept, weak_dims))
|
||||||
|
print(generate_practice_task(provider, focus_concept, weak_dims))
|
||||||
print(suggest_capstone(provider, args.domain))
|
print(suggest_capstone(provider, args.domain))
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
from .model_provider import ModelProvider
|
from .model_provider import ModelProvider
|
||||||
|
|
||||||
|
|
||||||
def generate_socratic_prompt(provider: ModelProvider, concept: str) -> str:
|
def generate_socratic_prompt(provider: ModelProvider, concept: str, weak_dimensions: list[str] | None = None) -> str:
|
||||||
|
weak_text = ""
|
||||||
|
if weak_dimensions:
|
||||||
|
weak_text = f" Focus especially on weak dimensions: {', '.join(weak_dimensions)}."
|
||||||
return provider.generate(
|
return provider.generate(
|
||||||
f"You are a Socratic mentor. Ask one probing question about '{concept}'."
|
f"You are a Socratic mentor. Ask one probing question about '{concept}'.{weak_text}"
|
||||||
).text
|
).text
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
from .model_provider import ModelProvider
|
from .model_provider import ModelProvider
|
||||||
|
|
||||||
|
|
||||||
def generate_practice_task(provider: ModelProvider, concept: str) -> str:
|
def generate_practice_task(provider: ModelProvider, concept: str, weak_dimensions: list[str] | None = None) -> str:
|
||||||
|
weak_text = ""
|
||||||
|
if weak_dimensions:
|
||||||
|
weak_text = f" Target the weak dimensions: {', '.join(weak_dimensions)}."
|
||||||
return provider.generate(
|
return provider.generate(
|
||||||
f"Generate one reasoning-heavy practice task for '{concept}'."
|
f"Generate one reasoning-heavy practice task for '{concept}'.{weak_text}"
|
||||||
).text
|
).text
|
||||||
|
|
|
||||||
|
|
@ -4,5 +4,5 @@ from didactopus.config import load_config
|
||||||
|
|
||||||
def test_load_example_config() -> None:
|
def test_load_example_config() -> None:
|
||||||
config = load_config(Path("configs/config.example.yaml"))
|
config = load_config(Path("configs/config.example.yaml"))
|
||||||
assert config.platform.evidence_weights["project"] == 2.5
|
assert config.platform.dimension_thresholds["transfer"] == 0.7
|
||||||
assert config.platform.recent_evidence_multiplier == 1.35
|
assert config.platform.confidence_threshold == 0.8
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,100 @@
|
||||||
|
from didactopus.adaptive_engine import LearnerProfile
|
||||||
|
from didactopus.evidence_engine import EvidenceItem, ingest_evidence_bundle
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_WEIGHTS = {"explanation": 1.0, "problem": 1.5, "project": 2.5, "transfer": 2.0}
|
||||||
|
DEFAULT_THRESHOLDS = {
|
||||||
|
"correctness": 0.8,
|
||||||
|
"explanation": 0.75,
|
||||||
|
"transfer": 0.7,
|
||||||
|
"project_execution": 0.75,
|
||||||
|
"critique": 0.7,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_full_multidim_mastery() -> None:
|
||||||
|
profile = LearnerProfile(learner_id="u1")
|
||||||
|
state = ingest_evidence_bundle(
|
||||||
|
profile,
|
||||||
|
[
|
||||||
|
EvidenceItem(
|
||||||
|
"c1",
|
||||||
|
"project",
|
||||||
|
0.9,
|
||||||
|
is_recent=True,
|
||||||
|
rubric_dimensions={
|
||||||
|
"correctness": 0.88,
|
||||||
|
"explanation": 0.82,
|
||||||
|
"transfer": 0.77,
|
||||||
|
"project_execution": 0.9,
|
||||||
|
"critique": 0.76,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
resurfacing_threshold=0.55,
|
||||||
|
confidence_threshold=0.75,
|
||||||
|
type_weights=DEFAULT_WEIGHTS,
|
||||||
|
recent_multiplier=1.35,
|
||||||
|
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||||
|
)
|
||||||
|
assert "c1" in profile.mastered_concepts
|
||||||
|
assert state.summary_by_concept["c1"].mastered is True
|
||||||
|
assert state.summary_by_concept["c1"].weak_dimensions == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_weakness_blocks_mastery() -> None:
|
||||||
|
profile = LearnerProfile(learner_id="u1")
|
||||||
|
state = ingest_evidence_bundle(
|
||||||
|
profile,
|
||||||
|
[
|
||||||
|
EvidenceItem(
|
||||||
|
"c1",
|
||||||
|
"project",
|
||||||
|
0.85,
|
||||||
|
is_recent=True,
|
||||||
|
rubric_dimensions={
|
||||||
|
"correctness": 0.9,
|
||||||
|
"explanation": 0.86,
|
||||||
|
"transfer": 0.52,
|
||||||
|
"project_execution": 0.88,
|
||||||
|
"critique": 0.8,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
resurfacing_threshold=0.55,
|
||||||
|
confidence_threshold=0.75,
|
||||||
|
type_weights=DEFAULT_WEIGHTS,
|
||||||
|
recent_multiplier=1.35,
|
||||||
|
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||||
|
)
|
||||||
|
assert "c1" not in profile.mastered_concepts
|
||||||
|
assert state.summary_by_concept["c1"].mastered is False
|
||||||
|
assert "transfer" in state.summary_by_concept["c1"].weak_dimensions
|
||||||
|
|
||||||
|
|
||||||
|
def test_resurfacing_from_multidim_weakness() -> None:
|
||||||
|
profile = LearnerProfile(learner_id="u1", mastered_concepts={"c1"})
|
||||||
|
state = ingest_evidence_bundle(
|
||||||
|
profile,
|
||||||
|
[
|
||||||
|
EvidenceItem(
|
||||||
|
"c1",
|
||||||
|
"problem",
|
||||||
|
0.45,
|
||||||
|
is_recent=True,
|
||||||
|
rubric_dimensions={
|
||||||
|
"correctness": 0.45,
|
||||||
|
"explanation": 0.5,
|
||||||
|
"transfer": 0.4,
|
||||||
|
"critique": 0.42,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
resurfacing_threshold=0.55,
|
||||||
|
confidence_threshold=0.75,
|
||||||
|
type_weights=DEFAULT_WEIGHTS,
|
||||||
|
recent_multiplier=1.35,
|
||||||
|
dimension_thresholds=DEFAULT_THRESHOLDS,
|
||||||
|
)
|
||||||
|
assert "c1" not in profile.mastered_concepts
|
||||||
|
assert "c1" in state.resurfaced_concepts
|
||||||
Loading…
Reference in New Issue