Add pilot corpus claim tournament fixture

This commit is contained in:
welsberr 2026-05-08 02:23:51 -04:00
parent 54ed7568b6
commit 169500369f
7 changed files with 116 additions and 0 deletions

View File

@ -0,0 +1,18 @@
{
"documents": [
{
"document_id": "introduction-to-evolutionary-biology",
"gold_claims": [
"Evolution is a change in the gene pool of a population over time.",
"Populations evolve, but individual organisms do not evolve during their lifetimes."
]
},
{
"document_id": "sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
"gold_claims": [
"Random genetic drift is a fundamental and important part of evolution.",
"Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift."
]
}
]
}

View File

@ -0,0 +1,20 @@
{
"chunks": [
{
"chunk_id": "intro-pilot-body-1",
"role": "body",
"section": "What is Evolution?",
"text": "Evolution is a change in the gene pool of a population over time. A gene is a hereditary unit that can be passed on unaltered for many generations. The gene pool is the set of all genes in a species or population.",
"line_start": 1,
"line_end": 4
},
{
"chunk_id": "intro-pilot-body-2",
"role": "body",
"section": "What is Evolution?",
"text": "Populations evolve. In order to understand evolution, it is necessary to view populations as a collection of individuals, each harboring a different set of traits. Individual organisms do not evolve, they retain the same genes throughout their life.",
"line_start": 6,
"line_end": 9
}
]
}

View File

@ -0,0 +1,11 @@
# Introduction to Evolutionary Biology
Evolution is a change in the gene pool of a population over time. A
gene is a hereditary unit that can be passed on unaltered for many
generations. The gene pool is the set of all genes in a species or
population.
Populations evolve. In order to understand evolution, it is necessary
to view populations as a collection of individuals, each harboring a
different set of traits. Individual organisms do not evolve, they
retain the same genes throughout their life.

View File

@ -0,0 +1,20 @@
{
"chunks": [
{
"chunk_id": "drift-pilot-body-1",
"role": "body",
"section": "On the importance of random genetic drift in modern evolutionary theory",
"text": "The idea here is that drift is bad because it's an impediment to natural selection, but there's a lot more to random genetic drift than this. In fact, drift is a fundamental and important part of evolution.",
"line_start": 1,
"line_end": 4
},
{
"chunk_id": "drift-pilot-body-2",
"role": "body",
"section": "Nearly-neutral alleles can be fixed in a population",
"text": "Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift. This is the important point that you must grasp if you are going to understand drift. It means that changes in the frequencies of alleles in a population can be due to drift and not just selection.",
"line_start": 6,
"line_end": 10
}
]
}

View File

@ -0,0 +1,12 @@
# Sandwalk: On the importance of random genetic drift in modern evolutionary theory
The idea here is that drift is bad because it's an impediment to
natural selection, but there's a lot more to random genetic drift
than this. In fact, drift is a fundamental and important part of
evolution.
Neutral and slightly deleterious alleles can be fixed in a population
by random genetic drift. This is the important point that you must
grasp if you are going to understand drift. It means that changes in
the frequencies of alleles in a population can be due to drift and
not just selection.

View File

@ -0,0 +1,20 @@
{
"documents": [
{
"document_id": "introduction-to-evolutionary-biology",
"title": "Introduction to Evolutionary Biology",
"document_kind": "web_article",
"output_dir": "documents/introduction-to-evolutionary-biology",
"markdown_path": "documents/introduction-to-evolutionary-biology/document.md",
"chunks_path": "documents/introduction-to-evolutionary-biology/document.chunks.json"
},
{
"document_id": "sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
"title": "Sandwalk: On the importance of random genetic drift in modern evolutionary theory",
"document_kind": "web_article",
"output_dir": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
"markdown_path": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory/document.md",
"chunks_path": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory/document.chunks.json"
}
]
}

View File

@ -9,6 +9,10 @@ def _fixture_root() -> Path:
return Path(__file__).parent / "fixtures" / "doclift_claim_eval"
def _pilot_fixture_root() -> Path:
return Path(__file__).parent / "fixtures" / "doclift_claim_eval_pilot"
def test_doclift_claim_tournament_scores_two_tracks() -> None:
root = _fixture_root()
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
@ -28,3 +32,14 @@ def test_doclift_claim_tournament_broad_track_improves_recall_on_fixture() -> No
assert tracks["broad"]["recall"] >= tracks["conservative"]["recall"]
assert tracks["broad"]["matches"] >= tracks["conservative"]["matches"]
def test_doclift_claim_tournament_runs_on_real_corpus_fixture() -> None:
root = _pilot_fixture_root()
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
tracks = result["judge_summary"]["tracks"]
assert len(result["per_document"]) == 2
assert tracks["conservative"]["gold_claims"] == 4
assert tracks["broad"]["gold_claims"] == 4
assert tracks["broad"]["matches"] >= 1