79 lines
3.7 KiB
Python
79 lines
3.7 KiB
Python
import re
|
|
from .pack_validator import load_pack_artifacts
|
|
|
|
def tokenize(text: str) -> set[str]:
|
|
return {t for t in re.sub(r"[^a-z0-9]+", " ", str(text).lower()).split() if t}
|
|
|
|
def _concept_title_tokens(title: str) -> set[str]:
|
|
stop = {"the","of","and","to","for","in","on","a","an"}
|
|
return {t for t in tokenize(title) if t not in stop}
|
|
|
|
def coverage_alignment_for_pack(source_dir):
|
|
loaded = load_pack_artifacts(source_dir)
|
|
if not loaded["ok"]:
|
|
return {"warnings": [], "summary": {"coverage_warning_count": 0}}
|
|
concepts = loaded["artifacts"]["concepts"].get("concepts", []) or []
|
|
roadmap = loaded["artifacts"]["roadmap"].get("stages", []) or []
|
|
projects = loaded["artifacts"]["projects"].get("projects", []) or []
|
|
rubrics = loaded["artifacts"]["rubrics"].get("rubrics", []) or []
|
|
|
|
concept_by_id = {c.get("id"): c for c in concepts if c.get("id")}
|
|
roadmap_ids = {cid for stage in roadmap for cid in (stage.get("concepts", []) or [])}
|
|
checkpoint_tokens = tokenize(" ".join(str(item) for stage in roadmap for item in (stage.get("checkpoint", []) or [])))
|
|
project_ids = {cid for project in projects for cid in (project.get("prerequisites", []) or [])}
|
|
deliverable_tokens = tokenize(" ".join(str(item) for project in projects for item in (project.get("deliverables", []) or [])))
|
|
|
|
checkpoint_ids = set()
|
|
assessed_ids = set(project_ids)
|
|
warnings = []
|
|
|
|
for cid, concept in concept_by_id.items():
|
|
title_tokens = _concept_title_tokens(concept.get("title", ""))
|
|
if cid not in roadmap_ids:
|
|
warnings.append(f"Concept '{cid}' does not appear in any roadmap stage.")
|
|
if title_tokens and (title_tokens & checkpoint_tokens):
|
|
checkpoint_ids.add(cid)
|
|
else:
|
|
warnings.append(f"Concept '{cid}' is not reflected in checkpoint language.")
|
|
if cid not in project_ids:
|
|
warnings.append(f"Concept '{cid}' is not referenced by any project prerequisites.")
|
|
if cid in project_ids or cid in checkpoint_ids:
|
|
assessed_ids.add(cid)
|
|
else:
|
|
warnings.append(f"Concept '{cid}' is never covered by checkpoints or projects.")
|
|
|
|
for cid, concept in concept_by_id.items():
|
|
for signal in concept.get("mastery_signals", []) or []:
|
|
signal_tokens = tokenize(signal)
|
|
if signal_tokens and not ((signal_tokens & checkpoint_tokens) or (signal_tokens & deliverable_tokens)):
|
|
warnings.append(f"Mastery signal for concept '{cid}' is not reflected in checkpoints or project deliverables.")
|
|
|
|
rubric_tokens = set()
|
|
for rubric in rubrics:
|
|
for criterion in rubric.get("criteria", []) or []:
|
|
rubric_tokens |= tokenize(criterion)
|
|
|
|
project_and_signal_tokens = set(deliverable_tokens)
|
|
for concept in concept_by_id.values():
|
|
for signal in concept.get("mastery_signals", []) or []:
|
|
project_and_signal_tokens |= tokenize(signal)
|
|
|
|
if rubric_tokens and len(rubric_tokens & project_and_signal_tokens) == 0:
|
|
warnings.append("Rubric criteria show weak lexical overlap with mastery signals and project deliverables.")
|
|
|
|
concept_count = max(1, len(concept_by_id))
|
|
if projects and len(project_ids) <= max(1, concept_count // 4):
|
|
warnings.append("Projects appear to cover only a narrow subset of the concept set.")
|
|
|
|
return {
|
|
"warnings": warnings,
|
|
"summary": {
|
|
"coverage_warning_count": len(warnings),
|
|
"concept_count": len(concept_by_id),
|
|
"roadmap_covered_count": len(roadmap_ids & set(concept_by_id)),
|
|
"checkpoint_covered_count": len(checkpoint_ids),
|
|
"project_covered_count": len(project_ids & set(concept_by_id)),
|
|
"assessed_concept_count": len(assessed_ids),
|
|
},
|
|
}
|