Didactopus/tests/test_multilingual_qa.py

62 lines
2.4 KiB
Python

from pathlib import Path
from didactopus.multilingual_qa import (
load_multilingual_qa_spec,
multilingual_qa_for_pack,
multilingual_qa_for_text,
round_trip_source_phrases,
round_trip_warning_for_phrases,
)
def test_load_multilingual_qa_spec_reads_ocw_pack() -> None:
spec = load_multilingual_qa_spec("domain-packs/mit-ocw-information-entropy")
assert spec["source_language"] == "en"
assert "es" in spec["targets"]
assert "fr" in spec["targets"]
def test_multilingual_qa_for_text_accepts_spanish_preservation() -> None:
spec = load_multilingual_qa_spec("domain-packs/mit-ocw-information-entropy")
result = multilingual_qa_for_text(
spec,
language="es",
text="La entropía de Shannon no es idéntica a la entropía termodinámica, y la capacidad del canal impone otro límite.",
)
assert result["summary"]["matched_term_count"] >= 2
assert result["summary"]["matched_caveat_count"] == 1
assert result["summary"]["confusion_hit_count"] == 0
def test_multilingual_qa_for_text_flags_confusion() -> None:
spec = load_multilingual_qa_spec("domain-packs/mit-ocw-information-entropy")
result = multilingual_qa_for_text(
spec,
language="es",
text="La entropía de Shannon es idéntica a la entropía termodinámica.",
)
assert result["summary"]["confusion_hit_count"] == 1
assert any("forbidden multilingual confusion" in warning.lower() for warning in result["warnings"])
def test_multilingual_qa_for_pack_handles_missing_spec(tmp_path: Path) -> None:
result = multilingual_qa_for_pack(tmp_path, language="es", text="Texto de prueba.")
assert any("no multilingual qa spec" in warning.lower() for warning in result["warnings"])
def test_round_trip_warning_for_phrases_flags_drift() -> None:
result = round_trip_warning_for_phrases(
["Shannon entropy", "channel capacity"],
"This back translation only preserved Shannon entropy.",
)
assert result["summary"]["round_trip_warning_count"] == 1
assert result["summary"]["drifted_phrases"] == ["channel capacity"]
def test_round_trip_source_phrases_use_canonical_source_text() -> None:
spec = load_multilingual_qa_spec("domain-packs/mit-ocw-information-entropy")
phrases = round_trip_source_phrases(spec, language="es")
assert "Shannon entropy" in phrases
assert "channel capacity" in phrases
assert "Shannon entropy is not identical to thermodynamic entropy" in phrases