Add pilot corpus claim tournament fixture
This commit is contained in:
parent
54ed7568b6
commit
169500369f
|
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"document_id": "introduction-to-evolutionary-biology",
|
||||||
|
"gold_claims": [
|
||||||
|
"Evolution is a change in the gene pool of a population over time.",
|
||||||
|
"Populations evolve, but individual organisms do not evolve during their lifetimes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"document_id": "sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
|
||||||
|
"gold_claims": [
|
||||||
|
"Random genetic drift is a fundamental and important part of evolution.",
|
||||||
|
"Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"chunk_id": "intro-pilot-body-1",
|
||||||
|
"role": "body",
|
||||||
|
"section": "What is Evolution?",
|
||||||
|
"text": "Evolution is a change in the gene pool of a population over time. A gene is a hereditary unit that can be passed on unaltered for many generations. The gene pool is the set of all genes in a species or population.",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"chunk_id": "intro-pilot-body-2",
|
||||||
|
"role": "body",
|
||||||
|
"section": "What is Evolution?",
|
||||||
|
"text": "Populations evolve. In order to understand evolution, it is necessary to view populations as a collection of individuals, each harboring a different set of traits. Individual organisms do not evolve, they retain the same genes throughout their life.",
|
||||||
|
"line_start": 6,
|
||||||
|
"line_end": 9
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Introduction to Evolutionary Biology
|
||||||
|
|
||||||
|
Evolution is a change in the gene pool of a population over time. A
|
||||||
|
gene is a hereditary unit that can be passed on unaltered for many
|
||||||
|
generations. The gene pool is the set of all genes in a species or
|
||||||
|
population.
|
||||||
|
|
||||||
|
Populations evolve. In order to understand evolution, it is necessary
|
||||||
|
to view populations as a collection of individuals, each harboring a
|
||||||
|
different set of traits. Individual organisms do not evolve, they
|
||||||
|
retain the same genes throughout their life.
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"chunk_id": "drift-pilot-body-1",
|
||||||
|
"role": "body",
|
||||||
|
"section": "On the importance of random genetic drift in modern evolutionary theory",
|
||||||
|
"text": "The idea here is that drift is bad because it's an impediment to natural selection, but there's a lot more to random genetic drift than this. In fact, drift is a fundamental and important part of evolution.",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"chunk_id": "drift-pilot-body-2",
|
||||||
|
"role": "body",
|
||||||
|
"section": "Nearly-neutral alleles can be fixed in a population",
|
||||||
|
"text": "Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift. This is the important point that you must grasp if you are going to understand drift. It means that changes in the frequencies of alleles in a population can be due to drift and not just selection.",
|
||||||
|
"line_start": 6,
|
||||||
|
"line_end": 10
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Sandwalk: On the importance of random genetic drift in modern evolutionary theory
|
||||||
|
|
||||||
|
The idea here is that drift is bad because it's an impediment to
|
||||||
|
natural selection, but there's a lot more to random genetic drift
|
||||||
|
than this. In fact, drift is a fundamental and important part of
|
||||||
|
evolution.
|
||||||
|
|
||||||
|
Neutral and slightly deleterious alleles can be fixed in a population
|
||||||
|
by random genetic drift. This is the important point that you must
|
||||||
|
grasp if you are going to understand drift. It means that changes in
|
||||||
|
the frequencies of alleles in a population can be due to drift and
|
||||||
|
not just selection.
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"document_id": "introduction-to-evolutionary-biology",
|
||||||
|
"title": "Introduction to Evolutionary Biology",
|
||||||
|
"document_kind": "web_article",
|
||||||
|
"output_dir": "documents/introduction-to-evolutionary-biology",
|
||||||
|
"markdown_path": "documents/introduction-to-evolutionary-biology/document.md",
|
||||||
|
"chunks_path": "documents/introduction-to-evolutionary-biology/document.chunks.json"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"document_id": "sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
|
||||||
|
"title": "Sandwalk: On the importance of random genetic drift in modern evolutionary theory",
|
||||||
|
"document_kind": "web_article",
|
||||||
|
"output_dir": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory",
|
||||||
|
"markdown_path": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory/document.md",
|
||||||
|
"chunks_path": "documents/sandwalk-on-the-importance-of-random-genetic-drift-in-modern-evolutionary-theory/document.chunks.json"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -9,6 +9,10 @@ def _fixture_root() -> Path:
|
||||||
return Path(__file__).parent / "fixtures" / "doclift_claim_eval"
|
return Path(__file__).parent / "fixtures" / "doclift_claim_eval"
|
||||||
|
|
||||||
|
|
||||||
|
def _pilot_fixture_root() -> Path:
|
||||||
|
return Path(__file__).parent / "fixtures" / "doclift_claim_eval_pilot"
|
||||||
|
|
||||||
|
|
||||||
def test_doclift_claim_tournament_scores_two_tracks() -> None:
|
def test_doclift_claim_tournament_scores_two_tracks() -> None:
|
||||||
root = _fixture_root()
|
root = _fixture_root()
|
||||||
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
|
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
|
||||||
|
|
@ -28,3 +32,14 @@ def test_doclift_claim_tournament_broad_track_improves_recall_on_fixture() -> No
|
||||||
|
|
||||||
assert tracks["broad"]["recall"] >= tracks["conservative"]["recall"]
|
assert tracks["broad"]["recall"] >= tracks["conservative"]["recall"]
|
||||||
assert tracks["broad"]["matches"] >= tracks["conservative"]["matches"]
|
assert tracks["broad"]["matches"] >= tracks["conservative"]["matches"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_doclift_claim_tournament_runs_on_real_corpus_fixture() -> None:
|
||||||
|
root = _pilot_fixture_root()
|
||||||
|
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
|
||||||
|
tracks = result["judge_summary"]["tracks"]
|
||||||
|
|
||||||
|
assert len(result["per_document"]) == 2
|
||||||
|
assert tracks["conservative"]["gold_claims"] == 4
|
||||||
|
assert tracks["broad"]["gold_claims"] == 4
|
||||||
|
assert tracks["broad"]["matches"] >= 1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue