Didactopus/tests/test_archive_phrase_invento...

75 lines
2.9 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
from didactopus.archive_phrase_inventory import build_archive_phrase_inventory
from didactopus.archive_phrase_inventory import write_archive_phrase_inventory_report
def test_build_archive_phrase_inventory_prioritizes_repeated_seeded_distinctions(tmp_path: Path) -> None:
docs = tmp_path / "bundle" / "documents"
(docs / "intro").mkdir(parents=True)
(docs / "drift").mkdir(parents=True)
(docs / "intro" / "document.md").write_text(
"# Introduction to Evolutionary Biology\n\n"
"Natural selection is not identical to genetic drift. "
"Common descent refers to the branching history of populations.\n"
)
(docs / "drift" / "document.md").write_text(
"# Drift and Selection\n\n"
"Genetic drift can change allele frequencies. "
"Natural selection and genetic drift should be distinguished in explanation.\n"
)
report = build_archive_phrase_inventory(
[tmp_path / "bundle"],
seed_terms=["natural selection", "genetic drift", "common descent"],
top_n=10,
)
phrases = [item["phrase"] for item in report["prioritized_concepts"]]
assert "natural selection" in phrases
assert "genetic drift" in phrases
row = next(item for item in report["prioritized_concepts"] if item["phrase"] == "genetic drift")
assert row["seed_match"] is True
assert row["translation_priority"] is True
assert row["document_count"] >= 2
def test_write_archive_phrase_inventory_report_writes_json_and_markdown(tmp_path: Path) -> None:
source = tmp_path / "notes.md"
source.write_text(
"# Heritable Change\n\n"
"Phenotypic plasticity does not by itself imply heritable evolutionary change.\n"
)
out = tmp_path / "report.json"
summary = write_archive_phrase_inventory_report(
[source],
out,
seed_terms=["phenotypic plasticity"],
top_n=5,
)
assert out.exists()
assert out.with_suffix(".md").exists()
payload = json.loads(out.read_text())
assert payload["summary"]["document_count"] == 1
assert summary["summary"]["distinct_phrase_count"] >= 1
def test_bundle_control_files_are_skipped(tmp_path: Path) -> None:
bundle = tmp_path / "augmentation"
snippets = bundle / "snippets"
snippets.mkdir(parents=True)
(bundle / "bundle.yaml").write_text("title: test\n")
(snippets / "concept-alignment.yaml").write_text("items: []\n")
(snippets / "plasticity.md").write_text(
"# Plasticity\n\nPlasticity can mislead evolutionary inference if heredity is not checked.\n"
)
report = build_archive_phrase_inventory([bundle], top_n=10)
assert report["summary"]["document_count"] == 1
phrases = [item["phrase"] for item in report["prioritized_concepts"]]
assert any("plasticity" in phrase or "evolutionary inference" in phrase for phrase in phrases)