152 lines
7.0 KiB
JSON
152 lines
7.0 KiB
JSON
{
|
|
"benchmark": {
|
|
"name": "didactopus-local-model-adequacy",
|
|
"task_family": "graph-grounded-mentor-loop",
|
|
"provider": "stub",
|
|
"hardware_profile": {
|
|
"profile_name": "unspecified-local",
|
|
"cpu": "unknown",
|
|
"ram_gb": null,
|
|
"notes": ""
|
|
}
|
|
},
|
|
"context": {
|
|
"skill_name": "ocw-information-entropy-agent",
|
|
"study_plan_task": "Help a learner connect Shannon entropy, channel capacity, and thermodynamic entropy.",
|
|
"primary_concept": "Independent Reasoning and Careful Comparison",
|
|
"secondary_concept": "Thermodynamics and Entropy",
|
|
"source_language": "en",
|
|
"output_language": "es"
|
|
},
|
|
"role_results": [
|
|
{
|
|
"role": "mentor",
|
|
"provider": "stub",
|
|
"model_name": "local-demo",
|
|
"latency_ms": 0.022,
|
|
"response_preview": "[stubbed-response] [mentor] Concept: Independent Reasoning and Careful Comparison Prerequisites: Course Notes and Reference Texts Supporting lessons",
|
|
"adequacy_score": 0.52,
|
|
"adequacy_rating": "inadequate",
|
|
"grounded_score": 0.65,
|
|
"multilingual_score": 0.0,
|
|
"round_trip": {
|
|
"warnings": [
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
],
|
|
"summary": {
|
|
"source_phrase_count": 4,
|
|
"round_trip_warning_count": 4,
|
|
"drifted_phrases": [
|
|
"Shannon entropy",
|
|
"channel capacity",
|
|
"thermodynamic entropy",
|
|
"Shannon entropy is not identical to thermodynamic entropy"
|
|
]
|
|
}
|
|
},
|
|
"notes": [
|
|
"Did not ask a focused learner question.",
|
|
"Response does not appear to be in Spanish.",
|
|
"Missing required multilingual term 'shannon-entropy' for language 'es'.",
|
|
"Missing required multilingual term 'channel-capacity' for language 'es'.",
|
|
"Missing required multilingual term 'thermodynamic-entropy' for language 'es'.",
|
|
"Missing required multilingual caveat 'shannon-vs-thermo-not-identical' for language 'es'.",
|
|
"Did not visibly preserve a key grounded concept term in multilingual output.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
]
|
|
},
|
|
{
|
|
"role": "practice",
|
|
"provider": "stub",
|
|
"model_name": "local-demo",
|
|
"latency_ms": 0.007,
|
|
"response_preview": "[stubbed-response] [practice] Concept: Independent Reasoning and Careful Comparison Prerequisites: Course Notes and Reference Texts Supporting lessons",
|
|
"adequacy_score": 0.82,
|
|
"adequacy_rating": "adequate",
|
|
"grounded_score": 1.0,
|
|
"multilingual_score": 0.1,
|
|
"round_trip": {
|
|
"warnings": [
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
],
|
|
"summary": {
|
|
"source_phrase_count": 4,
|
|
"round_trip_warning_count": 4,
|
|
"drifted_phrases": [
|
|
"Shannon entropy",
|
|
"channel capacity",
|
|
"thermodynamic entropy",
|
|
"Shannon entropy is not identical to thermodynamic entropy"
|
|
]
|
|
}
|
|
},
|
|
"notes": [
|
|
"Response does not appear to be in Spanish.",
|
|
"Missing required multilingual term 'shannon-entropy' for language 'es'.",
|
|
"Missing required multilingual term 'channel-capacity' for language 'es'.",
|
|
"Missing required multilingual term 'thermodynamic-entropy' for language 'es'.",
|
|
"Missing required multilingual caveat 'shannon-vs-thermo-not-identical' for language 'es'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
]
|
|
},
|
|
{
|
|
"role": "evaluator",
|
|
"provider": "stub",
|
|
"model_name": "local-demo",
|
|
"latency_ms": 0.005,
|
|
"response_preview": "[stubbed-response] [evaluator] Concept: Independent Reasoning and Careful Comparison Prerequisites: Course Notes and Reference Texts Supporting lessons",
|
|
"adequacy_score": 0.3,
|
|
"adequacy_rating": "inadequate",
|
|
"grounded_score": 0.35,
|
|
"multilingual_score": 0.1,
|
|
"round_trip": {
|
|
"warnings": [
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
],
|
|
"summary": {
|
|
"source_phrase_count": 4,
|
|
"round_trip_warning_count": 4,
|
|
"drifted_phrases": [
|
|
"Shannon entropy",
|
|
"channel capacity",
|
|
"thermodynamic entropy",
|
|
"Shannon entropy is not identical to thermodynamic entropy"
|
|
]
|
|
}
|
|
},
|
|
"notes": [
|
|
"Did not acknowledge learner strengths.",
|
|
"Did not provide a concrete next step.",
|
|
"Response does not appear to be in Spanish.",
|
|
"Missing required multilingual term 'shannon-entropy' for language 'es'.",
|
|
"Missing required multilingual term 'channel-capacity' for language 'es'.",
|
|
"Missing required multilingual term 'thermodynamic-entropy' for language 'es'.",
|
|
"Missing required multilingual caveat 'shannon-vs-thermo-not-identical' for language 'es'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'channel capacity'.",
|
|
"Round-trip translation did not preserve source phrase 'thermodynamic entropy'.",
|
|
"Round-trip translation did not preserve source phrase 'Shannon entropy is not identical to thermodynamic entropy'."
|
|
]
|
|
}
|
|
],
|
|
"summary": {
|
|
"overall_adequacy_score": 0.547,
|
|
"overall_adequacy_rating": "inadequate",
|
|
"recommended_use": "Not recommended for learner-facing local deployment."
|
|
}
|
|
} |