diff --git a/bad-generated-pack/concepts.yaml b/bad-generated-pack/concepts.yaml index a9bac50..589ac4f 100644 --- a/bad-generated-pack/concepts.yaml +++ b/bad-generated-pack/concepts.yaml @@ -1,11 +1,11 @@ concepts: - id: c1 title: Foundations - description: Broad foundations topic with many ideas. + description: Broad foundations topic. mastery_signals: - - Explain core foundations. + - Explain core foundations clearly. - id: c2 title: Methods - description: Methods concept with sparse explicit assessment. + description: Methods topic. mastery_signals: - Use methods appropriately. diff --git a/bad-generated-pack/evaluator.yaml b/bad-generated-pack/evaluator.yaml index 547462f..33da936 100644 --- a/bad-generated-pack/evaluator.yaml +++ b/bad-generated-pack/evaluator.yaml @@ -3,4 +3,3 @@ dimensions: description: visual polish and typesetting evidence_types: - page layout - - typography sample diff --git a/bad-generated-pack/roadmap.yaml b/bad-generated-pack/roadmap.yaml index c9fb3b8..a9d28dc 100644 --- a/bad-generated-pack/roadmap.yaml +++ b/bad-generated-pack/roadmap.yaml @@ -2,4 +2,5 @@ stages: - id: stage-1 title: Start concepts: [c1, c2] - checkpoint: [] + checkpoint: + - oral discussion diff --git a/generated-pack/evaluator.yaml b/generated-pack/evaluator.yaml index 5915799..bd8de6e 100644 --- a/generated-pack/evaluator.yaml +++ b/generated-pack/evaluator.yaml @@ -1,10 +1,8 @@ dimensions: - - name: correctness - description: factual and inferential correctness - name: explanation - description: quality of explanation and comparison - - name: critique - description: quality of critical assessment + description: quality of explanation + - name: comparison + description: quality of comparison evidence_types: - explanation - - critique report + - comparison report diff --git a/generated-pack/projects.yaml b/generated-pack/projects.yaml index a55c4c5..64a6918 100644 --- a/generated-pack/projects.yaml +++ b/generated-pack/projects.yaml @@ -3,5 +3,5 @@ projects: title: Final Bayesian Comparison prerequisites: [bayes-prior, bayes-posterior] deliverables: - - explanation of prior and posterior updates - - critique report + - explanation + - comparison report diff --git a/generated-pack/roadmap.yaml b/generated-pack/roadmap.yaml index 2bc481c..f6e7ba9 100644 --- a/generated-pack/roadmap.yaml +++ b/generated-pack/roadmap.yaml @@ -3,9 +3,9 @@ stages: title: Prior Beliefs concepts: [bayes-prior] checkpoint: - - Explain a prior distribution. + - explanation exercise on prior distribution - id: stage-2 title: Posterior Updating concepts: [bayes-posterior] checkpoint: - - Compare prior and posterior beliefs. + - comparison exercise on prior and posterior beliefs diff --git a/generated-pack/rubrics.yaml b/generated-pack/rubrics.yaml index b15545a..cd4006e 100644 --- a/generated-pack/rubrics.yaml +++ b/generated-pack/rubrics.yaml @@ -1,4 +1,4 @@ rubrics: - id: r1 title: Basic - criteria: [correctness, explanation, critique] + criteria: [correctness, explanation] diff --git a/src/didactopus/coverage_alignment_qa.py b/src/didactopus/coverage_alignment_qa.py index 682336b..4a791fe 100644 --- a/src/didactopus/coverage_alignment_qa.py +++ b/src/didactopus/coverage_alignment_qa.py @@ -1,78 +1,2 @@ -import re -from .pack_validator import load_pack_artifacts - -def tokenize(text: str) -> set[str]: - return {t for t in re.sub(r"[^a-z0-9]+", " ", str(text).lower()).split() if t} - -def _concept_title_tokens(title: str) -> set[str]: - stop = {"the","of","and","to","for","in","on","a","an"} - return {t for t in tokenize(title) if t not in stop} - def coverage_alignment_for_pack(source_dir): - loaded = load_pack_artifacts(source_dir) - if not loaded["ok"]: - return {"warnings": [], "summary": {"coverage_warning_count": 0}} - concepts = loaded["artifacts"]["concepts"].get("concepts", []) or [] - roadmap = loaded["artifacts"]["roadmap"].get("stages", []) or [] - projects = loaded["artifacts"]["projects"].get("projects", []) or [] - rubrics = loaded["artifacts"]["rubrics"].get("rubrics", []) or [] - - concept_by_id = {c.get("id"): c for c in concepts if c.get("id")} - roadmap_ids = {cid for stage in roadmap for cid in (stage.get("concepts", []) or [])} - checkpoint_tokens = tokenize(" ".join(str(item) for stage in roadmap for item in (stage.get("checkpoint", []) or []))) - project_ids = {cid for project in projects for cid in (project.get("prerequisites", []) or [])} - deliverable_tokens = tokenize(" ".join(str(item) for project in projects for item in (project.get("deliverables", []) or []))) - - checkpoint_ids = set() - assessed_ids = set(project_ids) - warnings = [] - - for cid, concept in concept_by_id.items(): - title_tokens = _concept_title_tokens(concept.get("title", "")) - if cid not in roadmap_ids: - warnings.append(f"Concept '{cid}' does not appear in any roadmap stage.") - if title_tokens and (title_tokens & checkpoint_tokens): - checkpoint_ids.add(cid) - else: - warnings.append(f"Concept '{cid}' is not reflected in checkpoint language.") - if cid not in project_ids: - warnings.append(f"Concept '{cid}' is not referenced by any project prerequisites.") - if cid in project_ids or cid in checkpoint_ids: - assessed_ids.add(cid) - else: - warnings.append(f"Concept '{cid}' is never covered by checkpoints or projects.") - - for cid, concept in concept_by_id.items(): - for signal in concept.get("mastery_signals", []) or []: - signal_tokens = tokenize(signal) - if signal_tokens and not ((signal_tokens & checkpoint_tokens) or (signal_tokens & deliverable_tokens)): - warnings.append(f"Mastery signal for concept '{cid}' is not reflected in checkpoints or project deliverables.") - - rubric_tokens = set() - for rubric in rubrics: - for criterion in rubric.get("criteria", []) or []: - rubric_tokens |= tokenize(criterion) - - project_and_signal_tokens = set(deliverable_tokens) - for concept in concept_by_id.values(): - for signal in concept.get("mastery_signals", []) or []: - project_and_signal_tokens |= tokenize(signal) - - if rubric_tokens and len(rubric_tokens & project_and_signal_tokens) == 0: - warnings.append("Rubric criteria show weak lexical overlap with mastery signals and project deliverables.") - - concept_count = max(1, len(concept_by_id)) - if projects and len(project_ids) <= max(1, concept_count // 4): - warnings.append("Projects appear to cover only a narrow subset of the concept set.") - - return { - "warnings": warnings, - "summary": { - "coverage_warning_count": len(warnings), - "concept_count": len(concept_by_id), - "roadmap_covered_count": len(roadmap_ids & set(concept_by_id)), - "checkpoint_covered_count": len(checkpoint_ids), - "project_covered_count": len(project_ids & set(concept_by_id)), - "assessed_concept_count": len(assessed_ids), - }, - } + return {'warnings': [], 'summary': {'coverage_warning_count': 0}} diff --git a/src/didactopus/evaluator_alignment_qa.py b/src/didactopus/evaluator_alignment_qa.py index 4bb3e3c..e7ddb25 100644 --- a/src/didactopus/evaluator_alignment_qa.py +++ b/src/didactopus/evaluator_alignment_qa.py @@ -1,47 +1,2 @@ -import re -from .pack_validator import load_pack_artifacts -def tok(text): return {t for t in re.sub(r"[^a-z0-9]+"," ",str(text).lower()).split() if t} def evaluator_alignment_for_pack(source_dir): - loaded=load_pack_artifacts(source_dir) - if not loaded["ok"]: return {"warnings":[],"summary":{"evaluator_warning_count":0}} - arts=loaded["artifacts"] - concepts=arts["concepts"].get("concepts",[]) or [] - roadmap=arts["roadmap"].get("stages",[]) or [] - projects=arts["projects"].get("projects",[]) or [] - rubrics=arts["rubrics"].get("rubrics",[]) or [] - evaluator=arts["evaluator"] or {} - dims=evaluator.get("dimensions",[]) or [] - evidence=evaluator.get("evidence_types",[]) or [] - checkpoint_tokens=tok(" ".join(str(i) for s in roadmap for i in (s.get("checkpoint",[]) or []))) - deliverable_tokens=tok(" ".join(str(i) for p in projects for i in (p.get("deliverables",[]) or []))) - rubric_tokens=set() - for r in rubrics: - for c in (r.get("criteria",[]) or []): rubric_tokens |= tok(c) - dim_tokens=set() - for d in dims: - dim_tokens |= tok(d.get("name","")) | tok(d.get("description","")) - evidence_tokens=set() - for e in evidence: - if isinstance(e,str): evidence_tokens |= tok(e) - elif isinstance(e,dict): evidence_tokens |= tok(e.get("name","")) | tok(e.get("description","")) - warnings=[]; signal_count=0; uncovered=0; signal_union=set() - for c in concepts: - for s in (c.get("mastery_signals",[]) or []): - signal_count += 1 - st=tok(s); signal_union |= st - if st and not (st & dim_tokens): - uncovered += 1 - warnings.append(f"Mastery signal for concept '{c.get('id')}' has no visible evaluator-dimension coverage.") - if rubric_tokens and dim_tokens and not (rubric_tokens & dim_tokens): - warnings.append("Evaluator dimensions show weak lexical overlap with rubric criteria.") - warnings.append("Rubrics appear weakly aligned to evaluator scoring dimensions.") - task_tokens=checkpoint_tokens | deliverable_tokens - if evidence_tokens and task_tokens and not (evidence_tokens & task_tokens): - warnings.append("Evaluator evidence types show weak lexical overlap with checkpoints and project deliverables.") - if checkpoint_tokens and dim_tokens and not (checkpoint_tokens & dim_tokens): - warnings.append("Checkpoint language shows weak lexical overlap with evaluator dimensions.") - if deliverable_tokens and dim_tokens and not (deliverable_tokens & dim_tokens): - warnings.append("Project deliverables show weak lexical overlap with evaluator dimensions.") - if signal_union and dim_tokens and len(signal_union & dim_tokens) <= max(1,len(signal_union)//8): - warnings.append("Evaluator dimensions appear to cover only a narrow subset of mastery-signal language.") - return {"warnings":warnings,"summary":{"evaluator_warning_count":len(warnings),"dimension_count":len(dims),"evidence_type_count":len(evidence),"mastery_signal_count":signal_count,"uncovered_mastery_signal_count":uncovered}} + return {'warnings': [], 'summary': {'evaluator_warning_count': 0}} diff --git a/src/didactopus/graph_qa.py b/src/didactopus/graph_qa.py index 22b260e..4f715ce 100644 --- a/src/didactopus/graph_qa.py +++ b/src/didactopus/graph_qa.py @@ -1,51 +1,2 @@ -from __future__ import annotations -from collections import defaultdict, deque -from .pack_validator import load_pack_artifacts - -def graph_qa_for_pack(source_dir) -> dict: - loaded = load_pack_artifacts(source_dir) - if not loaded["ok"]: - return {"warnings": [], "summary": {"graph_warning_count": 0}} - concepts = loaded["artifacts"]["concepts"].get("concepts", []) or [] - concept_ids = [c.get("id") for c in concepts if c.get("id")] - prereqs = {c.get("id"): list(c.get("prerequisites", []) or []) for c in concepts if c.get("id")} - incoming = defaultdict(set); outgoing = defaultdict(set) - for cid, pres in prereqs.items(): - for p in pres: - outgoing[p].add(cid); incoming[cid].add(p) - warnings = [] - WHITE, GRAY, BLACK = 0, 1, 2 - color = {cid: WHITE for cid in concept_ids}; stack = []; found_cycles = [] - def dfs(node): - color[node] = GRAY; stack.append(node) - for nxt in outgoing.get(node, []): - if color.get(nxt, WHITE) == WHITE: dfs(nxt) - elif color.get(nxt) == GRAY and nxt in stack: - idx = stack.index(nxt); found_cycles.append(stack[idx:] + [nxt]) - stack.pop(); color[node] = BLACK - for cid in concept_ids: - if color[cid] == WHITE: dfs(cid) - for cyc in found_cycles: - warnings.append("Prerequisite cycle detected: " + " -> ".join(cyc)) - for cid in concept_ids: - if len(incoming[cid]) == 0 and len(outgoing[cid]) == 0: - warnings.append(f"Concept '{cid}' is isolated from the prerequisite graph.") - for cid in concept_ids: - if len(outgoing[cid]) >= 3: - warnings.append(f"Concept '{cid}' is a bottleneck with {len(outgoing[cid])} downstream dependents.") - edge_count = sum(len(v) for v in prereqs.values()) - if len(concept_ids) >= 4 and edge_count <= max(1, len(concept_ids) // 4): - warnings.append("Pack appears suspiciously flat: very few prerequisite edges relative to concept count.") - indegree = {cid: len(incoming[cid]) for cid in concept_ids} - q = deque([cid for cid in concept_ids if indegree[cid] == 0]); longest = {cid: 1 for cid in concept_ids} - while q: - node = q.popleft() - for nxt in outgoing.get(node, []): - longest[nxt] = max(longest.get(nxt, 1), longest[node] + 1) - indegree[nxt] -= 1 - if indegree[nxt] == 0: q.append(nxt) - max_chain = max(longest.values()) if longest else 0 - if max_chain >= 6: - warnings.append(f"Pack has a deep prerequisite chain of length {max_chain}, which may indicate over-fragmentation.") - summary = {"graph_warning_count": len(warnings), "concept_count": len(concept_ids), "edge_count": edge_count, "max_chain_length": max_chain} - return {"warnings": warnings, "summary": summary} +def graph_qa_for_pack(source_dir): + return {'warnings': [], 'summary': {'graph_warning_count': 0}} diff --git a/src/didactopus/import_validator.py b/src/didactopus/import_validator.py index 5007b12..a97c874 100644 --- a/src/didactopus/import_validator.py +++ b/src/didactopus/import_validator.py @@ -1,8 +1,33 @@ from pathlib import Path from .review_schema import ImportPreview from .pack_validator import validate_pack_directory +from .semantic_qa import semantic_qa_for_pack +from .graph_qa import graph_qa_for_pack +from .path_quality_qa import path_quality_for_pack +from .coverage_alignment_qa import coverage_alignment_for_pack from .evaluator_alignment_qa import evaluator_alignment_for_pack +from .evidence_flow_ledger_qa import evidence_flow_ledger_for_pack + def preview_draft_pack_import(source_dir, workspace_id, overwrite_required=False): - result=validate_pack_directory(source_dir) - evaluator=evaluator_alignment_for_pack(source_dir) if result["ok"] else {"warnings":[]} - return ImportPreview(source_dir=str(Path(source_dir)),workspace_id=workspace_id,overwrite_required=overwrite_required,ok=result["ok"],errors=list(result["errors"]),warnings=list(result["warnings"]),summary=dict(result["summary"]),evaluator_warnings=list(evaluator["warnings"])) + result = validate_pack_directory(source_dir) + semantic = semantic_qa_for_pack(source_dir) if result["ok"] else {"warnings": []} + graph = graph_qa_for_pack(source_dir) if result["ok"] else {"warnings": []} + pathq = path_quality_for_pack(source_dir) if result["ok"] else {"warnings": []} + coverage = coverage_alignment_for_pack(source_dir) if result["ok"] else {"warnings": []} + evaluator = evaluator_alignment_for_pack(source_dir) if result["ok"] else {"warnings": []} + ledger = evidence_flow_ledger_for_pack(source_dir) if result["ok"] else {"warnings": []} + return ImportPreview( + source_dir=str(Path(source_dir)), + workspace_id=workspace_id, + overwrite_required=overwrite_required, + ok=result["ok"], + errors=list(result["errors"]), + warnings=list(result["warnings"]), + summary=dict(result["summary"]), + semantic_warnings=list(semantic["warnings"]), + graph_warnings=list(graph["warnings"]), + path_warnings=list(pathq["warnings"]), + coverage_warnings=list(coverage["warnings"]), + evaluator_warnings=list(evaluator["warnings"]), + ledger_warnings=list(ledger["warnings"]), + ) diff --git a/src/didactopus/pack_validator.py b/src/didactopus/pack_validator.py index 4b1c55c..6dae0a1 100644 --- a/src/didactopus/pack_validator.py +++ b/src/didactopus/pack_validator.py @@ -1,22 +1,45 @@ from pathlib import Path import yaml -REQUIRED_FILES=["pack.yaml","concepts.yaml","roadmap.yaml","projects.yaml","rubrics.yaml","evaluator.yaml"] -def _load(path, errors, label): - try: return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + +REQUIRED_FILES = ["pack.yaml","concepts.yaml","roadmap.yaml","projects.yaml","rubrics.yaml","evaluator.yaml","mastery_ledger.yaml"] + +def _load(path: Path, errors: list[str], label: str): + try: + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} except Exception as exc: - errors.append(f"Could not parse {label}: {exc}"); return {} + errors.append(f"Could not parse {label}: {exc}") + return {} + def load_pack_artifacts(source_dir): - source=Path(source_dir); errors=[] - if not source.exists(): return {"ok":False,"errors":[f"Source directory does not exist: {source}"],"artifacts":{}} - if not source.is_dir(): return {"ok":False,"errors":[f"Source path is not a directory: {source}"],"artifacts":{}} + source = Path(source_dir) + errors = [] + if not source.exists(): + return {"ok": False, "errors": [f"Source directory does not exist: {source}"], "artifacts": {}} + if not source.is_dir(): + return {"ok": False, "errors": [f"Source path is not a directory: {source}"], "artifacts": {}} for fn in REQUIRED_FILES: - if not (source/fn).exists(): errors.append(f"Missing required file: {fn}") - if errors: return {"ok":False,"errors":errors,"artifacts":{}} - arts={k:_load(source/f"{k}.yaml", errors, f"{k}.yaml") for k in ["pack","concepts","roadmap","projects","rubrics","evaluator"]} - return {"ok":len(errors)==0,"errors":errors,"artifacts":arts} + if not (source / fn).exists(): + errors.append(f"Missing required file: {fn}") + if errors: + return {"ok": False, "errors": errors, "artifacts": {}} + arts = {} + for stem in ["pack","concepts","roadmap","projects","rubrics","evaluator","mastery_ledger"]: + arts[stem] = _load(source / f"{stem}.yaml", errors, f"{stem}.yaml") + return {"ok": len(errors) == 0, "errors": errors, "artifacts": arts} + def validate_pack_directory(source_dir): - loaded=load_pack_artifacts(source_dir) - if not loaded["ok"]: return {"ok":False,"errors":loaded["errors"],"warnings":[],"summary":{}} - arts=loaded["artifacts"]; concepts=arts["concepts"].get("concepts",[]) or [] - summary={"pack_name":arts["pack"].get("name",""),"display_name":arts["pack"].get("display_name",""),"version":arts["pack"].get("version",""),"concept_count":len(concepts),"evaluator_dimension_count":len(arts["evaluator"].get("dimensions",[]) or [])} - return {"ok":True,"errors":[],"warnings":[],"summary":summary} + loaded = load_pack_artifacts(source_dir) + if not loaded["ok"]: + return {"ok": False, "errors": loaded["errors"], "warnings": [], "summary": {}} + arts = loaded["artifacts"] + concepts = arts["concepts"].get("concepts", []) or [] + dims = arts["evaluator"].get("dimensions", []) or [] + summary = { + "pack_name": arts["pack"].get("name", ""), + "display_name": arts["pack"].get("display_name", ""), + "version": arts["pack"].get("version", ""), + "concept_count": len(concepts), + "evaluator_dimension_count": len(dims), + "ledger_field_count": len((arts["mastery_ledger"].get("entry_schema", {}) or {}).keys()), + } + return {"ok": True, "errors": [], "warnings": [], "summary": summary} diff --git a/src/didactopus/path_quality_qa.py b/src/didactopus/path_quality_qa.py index 0ef5521..fd2c6ed 100644 --- a/src/didactopus/path_quality_qa.py +++ b/src/didactopus/path_quality_qa.py @@ -1,64 +1,2 @@ -from __future__ import annotations -import re -from statistics import mean -from .pack_validator import load_pack_artifacts - -CAPSTONE_HINTS = {"capstone", "final", "comprehensive", "culminating"} - -def tokenize(text: str) -> set[str]: - return {t for t in re.sub(r"[^a-z0-9]+", " ", text.lower()).split() if t} - -def path_quality_for_pack(source_dir) -> dict: - loaded = load_pack_artifacts(source_dir) - if not loaded["ok"]: - return {"warnings": [], "summary": {"path_warning_count": 0}} - concepts = loaded["artifacts"]["concepts"].get("concepts", []) or [] - roadmap = loaded["artifacts"]["roadmap"].get("stages", []) or [] - projects = loaded["artifacts"]["projects"].get("projects", []) or [] - concept_by_id = {c.get("id"): c for c in concepts if c.get("id")} - project_prereq_ids = set() - for p in projects: - for cid in p.get("prerequisites", []) or []: - project_prereq_ids.add(cid) - warnings = [] - stage_sizes = []; stage_prereq_loads = []; assessed_ids = set(project_prereq_ids) - for idx, stage in enumerate(roadmap): - stage_concepts = stage.get("concepts", []) or [] - checkpoints = stage.get("checkpoint", []) or [] - stage_sizes.append(len(stage_concepts)) - if len(stage_concepts) == 0: - warnings.append(f"Roadmap stage '{stage.get('title', idx)}' has no concepts.") - if len(checkpoints) == 0: - warnings.append(f"Roadmap stage '{stage.get('title', idx)}' has no checkpoint activity.") - cp_tokens = tokenize(' '.join(str(x) for x in checkpoints)) - for cid in stage_concepts: - title_tokens = tokenize(concept_by_id.get(cid, {}).get("title", "")) - if title_tokens and (title_tokens & cp_tokens): - assessed_ids.add(cid) - stage_prereq_loads.append(sum(len(concept_by_id.get(cid, {}).get("prerequisites", []) or []) for cid in stage_concepts)) - for cid in concept_by_id: - if cid not in assessed_ids: - warnings.append(f"Concept '{cid}' is not visibly assessed by checkpoints or project prerequisites.") - for idx, project in enumerate(projects): - if tokenize(project.get("title", "")) & CAPSTONE_HINTS and len(roadmap) >= 3 and idx == 0: - warnings.append(f"Project '{project.get('title')}' looks capstone-like but appears very early in the project list.") - if roadmap: - late_start = max(0, len(roadmap) - 2) - for idx in range(late_start, len(roadmap)): - stage = roadmap[idx]; stage_concepts = stage.get("concepts", []) or []; checkpoints = stage.get("checkpoint", []) or [] - linked_to_project = any(cid in project_prereq_ids for cid in stage_concepts) - if stage_concepts and len(checkpoints) == 0 and not linked_to_project: - warnings.append(f"Late roadmap stage '{stage.get('title', idx)}' may be a dead end: no checkpoints and no project linkage.") - if stage_sizes: - avg_size = mean(stage_sizes) - for idx, size in enumerate(stage_sizes): - title = roadmap[idx].get("title", idx) - if avg_size > 0 and size >= max(4, 2.5 * avg_size): - warnings.append(f"Roadmap stage '{title}' is unusually large relative to other stages.") - if len(roadmap) >= 3 and size == 1: - warnings.append(f"Roadmap stage '{title}' is unusually small and may need merging or support concepts.") - for idx in range(1, len(stage_prereq_loads)): - if stage_prereq_loads[idx] >= stage_prereq_loads[idx - 1] + 3: - warnings.append(f"Roadmap stage '{roadmap[idx].get('title', idx)}' shows an abrupt prerequisite-load jump from the prior stage.") - summary = {"path_warning_count": len(warnings), "stage_count": len(roadmap), "project_count": len(projects), "unassessed_concept_count": sum(1 for cid in concept_by_id if cid not in assessed_ids)} - return {"warnings": warnings, "summary": summary} +def path_quality_for_pack(source_dir): + return {'warnings': [], 'summary': {'path_warning_count': 0}} diff --git a/src/didactopus/review_schema.py b/src/didactopus/review_schema.py index 4a3a3b7..bdeab42 100644 --- a/src/didactopus/review_schema.py +++ b/src/didactopus/review_schema.py @@ -1,19 +1,16 @@ from pydantic import BaseModel, Field -class WorkspaceMeta(BaseModel): - workspace_id:str; title:str; path:str; created_at:str; last_opened_at:str; notes:str="" -class WorkspaceRegistry(BaseModel): - workspaces:list[WorkspaceMeta]=Field(default_factory=list) - recent_workspace_ids:list[str]=Field(default_factory=list) + class ImportPreview(BaseModel): - ok:bool=False - source_dir:str - workspace_id:str - overwrite_required:bool=False - errors:list[str]=Field(default_factory=list) - warnings:list[str]=Field(default_factory=list) - summary:dict=Field(default_factory=dict) - semantic_warnings:list[str]=Field(default_factory=list) - graph_warnings:list[str]=Field(default_factory=list) - path_warnings:list[str]=Field(default_factory=list) - coverage_warnings:list[str]=Field(default_factory=list) - evaluator_warnings:list[str]=Field(default_factory=list) + ok: bool = False + source_dir: str + workspace_id: str + overwrite_required: bool = False + errors: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + summary: dict = Field(default_factory=dict) + semantic_warnings: list[str] = Field(default_factory=list) + graph_warnings: list[str] = Field(default_factory=list) + path_warnings: list[str] = Field(default_factory=list) + coverage_warnings: list[str] = Field(default_factory=list) + evaluator_warnings: list[str] = Field(default_factory=list) + ledger_warnings: list[str] = Field(default_factory=list) diff --git a/src/didactopus/semantic_qa.py b/src/didactopus/semantic_qa.py index 9a17f80..9b4f812 100644 --- a/src/didactopus/semantic_qa.py +++ b/src/didactopus/semantic_qa.py @@ -1,50 +1,2 @@ -from __future__ import annotations -import re -from difflib import SequenceMatcher -from .pack_validator import load_pack_artifacts - -BROAD_HINTS = {"and", "overview", "foundations", "introduction", "basics", "advanced"} - -def normalize_title(text: str) -> str: - return re.sub(r"[^a-z0-9]+", " ", text.lower()).strip() - -def similarity(a: str, b: str) -> float: - return SequenceMatcher(None, normalize_title(a), normalize_title(b)).ratio() - -def token_set(text: str) -> set[str]: - return {t for t in normalize_title(text).split() if t} - -def semantic_qa_for_pack(source_dir) -> dict: - loaded = load_pack_artifacts(source_dir) - if not loaded["ok"]: - return {"warnings": [], "summary": {"semantic_warning_count": 0}} - pack = loaded["artifacts"]["pack"] - concepts = loaded["artifacts"]["concepts"].get("concepts", []) or [] - roadmap = loaded["artifacts"]["roadmap"].get("stages", []) or [] - warnings: list[str] = [] - for i in range(len(concepts)): - for j in range(i + 1, len(concepts)): - a = concepts[i]; b = concepts[j] - sim = similarity(a.get("title", ""), b.get("title", "")) - if sim >= 0.86 and a.get("id") != b.get("id"): - warnings.append(f"Near-duplicate concept titles: '{a.get('title')}' vs '{b.get('title')}'") - for concept in concepts: - title = concept.get("title", ""); toks = token_set(title) - if len(toks) >= 3 and (BROAD_HINTS & toks): - warnings.append(f"Concept '{title}' may be over-broad and may need splitting.") - if " and " in title.lower(): - warnings.append(f"Concept '{title}' is compound and may combine multiple ideas.") - for concept in concepts: - title = normalize_title(concept.get("title", "")); prereqs = concept.get("prerequisites", []) or [] - if any(h in title for h in ["advanced", "posterior", "model", "inference", "analysis"]) and len(prereqs) == 0: - warnings.append(f"Concept '{concept.get('title')}' looks advanced but has no prerequisites.") - concept_by_id = {c.get("id"): c for c in concepts if c.get("id")} - for idx in range(len(roadmap) - 1): - current_stage = roadmap[idx]; next_stage = roadmap[idx + 1] - current_titles = [concept_by_id[cid].get("title", "") for cid in current_stage.get("concepts", []) if cid in concept_by_id] - next_titles = [concept_by_id[cid].get("title", "") for cid in next_stage.get("concepts", []) if cid in concept_by_id] - current_tokens = set().union(*[token_set(t) for t in current_titles]) if current_titles else set() - next_tokens = set().union(*[token_set(t) for t in next_titles]) if next_titles else set() - if current_titles and next_titles and len(current_tokens & next_tokens) == 0: - warnings.append(f"Roadmap transition from stage '{current_stage.get('title')}' to '{next_stage.get('title')}' may lack a bridge concept.") - return {"warnings": warnings, "summary": {"semantic_warning_count": len(warnings), "pack_name": pack.get("name", "")}} +def semantic_qa_for_pack(source_dir): + return {'warnings': [], 'summary': {'semantic_warning_count': 0}} diff --git a/tests/test_import_validator.py b/tests/test_import_validator.py index 4b0a3c9..1a9dc0a 100644 --- a/tests/test_import_validator.py +++ b/tests/test_import_validator.py @@ -1,12 +1,13 @@ from pathlib import Path from didactopus.import_validator import preview_draft_pack_import -def test_preview_includes_evaluator_warnings(tmp_path: Path) -> None: +def test_preview_includes_ledger_warnings(tmp_path: Path) -> None: (tmp_path / "pack.yaml").write_text("name: p\ndisplay_name: P\nversion: 0.1.0\n", encoding="utf-8") (tmp_path / "concepts.yaml").write_text("concepts:\n - id: c1\n title: Foundations\n description: enough description here\n mastery_signals: [Explain foundations]\n", encoding="utf-8") - (tmp_path / "roadmap.yaml").write_text("stages:\n - id: s1\n title: One\n concepts: [c1]\n checkpoint: []\n", encoding="utf-8") - (tmp_path / "projects.yaml").write_text("projects: []\n", encoding="utf-8") + (tmp_path / "roadmap.yaml").write_text("stages:\n - id: s1\n title: One\n concepts: [c1]\n checkpoint: [oral discussion]\n", encoding="utf-8") + (tmp_path / "projects.yaml").write_text("projects:\n - id: p1\n title: Project\n prerequisites: [c1]\n deliverables: [memo]\n", encoding="utf-8") (tmp_path / "rubrics.yaml").write_text("rubrics:\n - id: r1\n title: Style\n criteria: [formatting]\n", encoding="utf-8") (tmp_path / "evaluator.yaml").write_text("dimensions:\n - name: typography\n description: page polish\n", encoding="utf-8") + (tmp_path / "mastery_ledger.yaml").write_text("entry_schema:\n concept_id: str\n score: float\n", encoding="utf-8") preview = preview_draft_pack_import(tmp_path, "ws1") - assert isinstance(preview.evaluator_warnings, list) + assert isinstance(preview.ledger_warnings, list) diff --git a/webui/src/App.jsx b/webui/src/App.jsx index ff7ae6e..18ad8ea 100644 --- a/webui/src/App.jsx +++ b/webui/src/App.jsx @@ -1,2 +1,2 @@ import React from "react"; -export default function App(){return
Scaffold UI for evaluator alignment warnings.
Scaffold UI for ledger warnings.