GroundRecall/tests/test_doclift_claim_tourname...

53 lines
2.1 KiB
Python

from __future__ import annotations
from pathlib import Path
from groundrecall.doclift_claim_tournament import evaluate_doclift_claim_tracks
def _fixture_root() -> Path:
return Path(__file__).parent / "fixtures" / "doclift_claim_eval"
def _pilot_fixture_root() -> Path:
return Path(__file__).parent / "fixtures" / "doclift_claim_eval_pilot"
def test_doclift_claim_tournament_scores_two_tracks() -> None:
root = _fixture_root()
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
assert result["judge_summary"]["winner"] in {"conservative", "balanced", "broad"}
assert set(result["judge_summary"]["tracks"].keys()) == {"conservative", "balanced", "broad"}
assert len(result["per_document"]) == 2
intro = next(item for item in result["per_document"] if item["document_id"] == "intro-essay")
assert len(intro["tracks"]) == 3
assert intro["tracks"][0]["predicted_claims"]
assert intro["tracks"][1]["predicted_claims"]
assert intro["tracks"][2]["predicted_claims"]
def test_doclift_claim_tournament_broad_track_improves_recall_on_fixture() -> None:
root = _fixture_root()
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
tracks = result["judge_summary"]["tracks"]
assert tracks["broad"]["recall"] >= tracks["conservative"]["recall"]
assert tracks["broad"]["matches"] >= tracks["conservative"]["matches"]
assert tracks["balanced"]["precision"] >= tracks["conservative"]["precision"]
def test_doclift_claim_tournament_runs_on_real_corpus_fixture() -> None:
root = _pilot_fixture_root()
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
tracks = result["judge_summary"]["tracks"]
assert len(result["per_document"]) == 2
assert tracks["conservative"]["gold_claims"] == 4
assert tracks["balanced"]["gold_claims"] == 4
assert tracks["broad"]["gold_claims"] == 4
assert tracks["broad"]["matches"] >= 1
assert tracks["balanced"]["matches"] >= 1
assert tracks["balanced"]["recall"] >= tracks["broad"]["recall"]
assert tracks["balanced"]["f1"] >= tracks["broad"]["f1"]