Didactopus/src/didactopus/pack_validator.py

150 lines
6.0 KiB
Python

from __future__ import annotations
from pathlib import Path
import yaml
REQUIRED_FILES = ["pack.yaml", "concepts.yaml", "roadmap.yaml", "projects.yaml", "rubrics.yaml"]
OPTIONAL_FILES = {
"evaluator": "evaluator.yaml",
"mastery_ledger": "mastery_ledger.yaml",
}
def _safe_load_yaml(path: Path, errors: list[str], label: str):
try:
return yaml.safe_load(path.read_text(encoding="utf-8")) or {}
except Exception as exc:
errors.append(f"Could not parse {label}: {exc}")
return {}
def load_pack_artifacts(source_dir: str | Path) -> dict:
source = Path(source_dir)
errors: list[str] = []
if not source.exists():
return {"ok": False, "errors": [f"Source directory does not exist: {source}"], "warnings": [], "summary": {}, "artifacts": {}}
if not source.is_dir():
return {"ok": False, "errors": [f"Source path is not a directory: {source}"], "warnings": [], "summary": {}, "artifacts": {}}
for filename in REQUIRED_FILES:
if not (source / filename).exists():
errors.append(f"Missing required file: {filename}")
if errors:
return {"ok": False, "errors": errors, "warnings": [], "summary": {}, "artifacts": {}}
pack_data = _safe_load_yaml(source / "pack.yaml", errors, "pack.yaml")
concepts_data = _safe_load_yaml(source / "concepts.yaml", errors, "concepts.yaml")
roadmap_data = _safe_load_yaml(source / "roadmap.yaml", errors, "roadmap.yaml")
projects_data = _safe_load_yaml(source / "projects.yaml", errors, "projects.yaml")
rubrics_data = _safe_load_yaml(source / "rubrics.yaml", errors, "rubrics.yaml")
optional_data = {}
for key, filename in OPTIONAL_FILES.items():
optional_data[key] = _safe_load_yaml(source / filename, errors, filename) if (source / filename).exists() else {}
return {
"ok": len(errors) == 0,
"errors": errors,
"warnings": [],
"summary": {},
"artifacts": {
"pack": pack_data,
"concepts": concepts_data,
"roadmap": roadmap_data,
"projects": projects_data,
"rubrics": rubrics_data,
**optional_data,
},
}
def validate_pack_directory(source_dir: str | Path) -> dict:
loaded = load_pack_artifacts(source_dir)
errors = list(loaded["errors"])
warnings = list(loaded["warnings"])
summary = dict(loaded["summary"])
if not loaded["ok"]:
return {"ok": False, "errors": errors, "warnings": warnings, "summary": summary}
pack_data = loaded["artifacts"]["pack"]
concepts_data = loaded["artifacts"]["concepts"]
roadmap_data = loaded["artifacts"]["roadmap"]
projects_data = loaded["artifacts"]["projects"]
rubrics_data = loaded["artifacts"]["rubrics"]
for field in ["name", "display_name", "version"]:
if field not in pack_data:
warnings.append(f"pack.yaml has no '{field}' field.")
concepts = concepts_data.get("concepts", [])
roadmap_stages = roadmap_data.get("stages", [])
projects = projects_data.get("projects", [])
rubrics = rubrics_data.get("rubrics", [])
if not isinstance(concepts, list):
errors.append("concepts.yaml top-level 'concepts' is not a list.")
concepts = []
if not isinstance(roadmap_stages, list):
errors.append("roadmap.yaml top-level 'stages' is not a list.")
roadmap_stages = []
if not isinstance(projects, list):
errors.append("projects.yaml top-level 'projects' is not a list.")
projects = []
if not isinstance(rubrics, list):
errors.append("rubrics.yaml top-level 'rubrics' is not a list.")
rubrics = []
concept_ids = []
for idx, concept in enumerate(concepts):
cid = concept.get("id", "")
if not cid:
errors.append(f"Concept at index {idx} has no id.")
else:
concept_ids.append(cid)
if not concept.get("title"):
warnings.append(f"Concept '{cid or idx}' has no title.")
desc = str(concept.get("description", "") or "")
if len(desc.strip()) < 12:
warnings.append(f"Concept '{cid or idx}' has a very thin description.")
seen = set()
dups = set()
for cid in concept_ids:
if cid in seen:
dups.add(cid)
seen.add(cid)
for cid in sorted(dups):
errors.append(f"Duplicate concept id: {cid}")
concept_id_set = set(concept_ids)
for stage in roadmap_stages:
for cid in stage.get("concepts", []) or []:
if cid not in concept_id_set:
errors.append(f"roadmap.yaml references missing concept id: {cid}")
for project in projects:
if not project.get("id"):
warnings.append("A project entry has no id.")
for cid in project.get("prerequisites", []) or []:
if cid not in concept_id_set:
errors.append(f"projects.yaml references missing prerequisite concept id: {cid}")
for idx, rubric in enumerate(rubrics):
if not rubric.get("id"):
warnings.append(f"Rubric at index {idx} has no id.")
criteria = rubric.get("criteria", [])
if criteria is None:
warnings.append(f"Rubric '{rubric.get('id', idx)}' has null criteria.")
elif isinstance(criteria, list) and len(criteria) == 0:
warnings.append(f"Rubric '{rubric.get('id', idx)}' has empty criteria.")
elif not isinstance(criteria, list):
errors.append(f"Rubric '{rubric.get('id', idx)}' criteria is not a list.")
summary = {
"pack_name": pack_data.get("name", ""),
"display_name": pack_data.get("display_name", ""),
"version": pack_data.get("version", ""),
"concept_count": len(concepts),
"roadmap_stage_count": len(roadmap_stages),
"project_count": len(projects),
"rubric_count": len(rubrics),
"error_count": len(errors),
"warning_count": len(warnings),
}
return {"ok": len(errors) == 0, "errors": errors, "warnings": warnings, "summary": summary}