75 lines
2.9 KiB
Python
75 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from didactopus.archive_phrase_inventory import build_archive_phrase_inventory
|
|
from didactopus.archive_phrase_inventory import write_archive_phrase_inventory_report
|
|
|
|
|
|
def test_build_archive_phrase_inventory_prioritizes_repeated_seeded_distinctions(tmp_path: Path) -> None:
|
|
docs = tmp_path / "bundle" / "documents"
|
|
(docs / "intro").mkdir(parents=True)
|
|
(docs / "drift").mkdir(parents=True)
|
|
(docs / "intro" / "document.md").write_text(
|
|
"# Introduction to Evolutionary Biology\n\n"
|
|
"Natural selection is not identical to genetic drift. "
|
|
"Common descent refers to the branching history of populations.\n"
|
|
)
|
|
(docs / "drift" / "document.md").write_text(
|
|
"# Drift and Selection\n\n"
|
|
"Genetic drift can change allele frequencies. "
|
|
"Natural selection and genetic drift should be distinguished in explanation.\n"
|
|
)
|
|
|
|
report = build_archive_phrase_inventory(
|
|
[tmp_path / "bundle"],
|
|
seed_terms=["natural selection", "genetic drift", "common descent"],
|
|
top_n=10,
|
|
)
|
|
|
|
phrases = [item["phrase"] for item in report["prioritized_concepts"]]
|
|
assert "natural selection" in phrases
|
|
assert "genetic drift" in phrases
|
|
row = next(item for item in report["prioritized_concepts"] if item["phrase"] == "genetic drift")
|
|
assert row["seed_match"] is True
|
|
assert row["translation_priority"] is True
|
|
assert row["document_count"] >= 2
|
|
|
|
|
|
def test_write_archive_phrase_inventory_report_writes_json_and_markdown(tmp_path: Path) -> None:
|
|
source = tmp_path / "notes.md"
|
|
source.write_text(
|
|
"# Heritable Change\n\n"
|
|
"Phenotypic plasticity does not by itself imply heritable evolutionary change.\n"
|
|
)
|
|
out = tmp_path / "report.json"
|
|
summary = write_archive_phrase_inventory_report(
|
|
[source],
|
|
out,
|
|
seed_terms=["phenotypic plasticity"],
|
|
top_n=5,
|
|
)
|
|
|
|
assert out.exists()
|
|
assert out.with_suffix(".md").exists()
|
|
payload = json.loads(out.read_text())
|
|
assert payload["summary"]["document_count"] == 1
|
|
assert summary["summary"]["distinct_phrase_count"] >= 1
|
|
|
|
|
|
def test_bundle_control_files_are_skipped(tmp_path: Path) -> None:
|
|
bundle = tmp_path / "augmentation"
|
|
snippets = bundle / "snippets"
|
|
snippets.mkdir(parents=True)
|
|
(bundle / "bundle.yaml").write_text("title: test\n")
|
|
(snippets / "concept-alignment.yaml").write_text("items: []\n")
|
|
(snippets / "plasticity.md").write_text(
|
|
"# Plasticity\n\nPlasticity can mislead evolutionary inference if heredity is not checked.\n"
|
|
)
|
|
|
|
report = build_archive_phrase_inventory([bundle], top_n=10)
|
|
assert report["summary"]["document_count"] == 1
|
|
phrases = [item["phrase"] for item in report["prioritized_concepts"]]
|
|
assert any("plasticity" in phrase or "evolutionary inference" in phrase for phrase in phrases)
|