173 lines
6.3 KiB
Python
173 lines
6.3 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from didactopus.notebook_promotion_pipeline import run_notebook_promotion_pipeline
|
|
|
|
|
|
def test_notebook_promotion_pipeline_runs_end_to_end(tmp_path: Path) -> None:
|
|
pilot = tmp_path / "pilot"
|
|
docs_dir = pilot / "normalized" / "seed-bundle" / "documents" / "source-one"
|
|
docs_dir.mkdir(parents=True)
|
|
(docs_dir / "document.md").write_text(
|
|
"# Source One\n\nNatural selection can occur without leading to evolution if traits are not inherited. "
|
|
"Evolution is a change in the gene pool of a population over time.\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
export_dir = pilot / "groundrecall" / "export" / "canonical"
|
|
export_dir.mkdir(parents=True)
|
|
notebook_dir = pilot / "didactopus" / "notebook-page"
|
|
notebook_dir.mkdir(parents=True)
|
|
|
|
hub = {
|
|
"bundle_kind": "groundrecall_query_bundle",
|
|
"query_type": "concept",
|
|
"concept": {
|
|
"concept_id": "concept::hub",
|
|
"title": "Hub",
|
|
"aliases": [],
|
|
"description": "Hub concept",
|
|
"source_artifact_ids": ["ia_hub"],
|
|
"current_status": "reviewed",
|
|
},
|
|
"relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}],
|
|
"relations": [],
|
|
"supporting_observations": [],
|
|
"source_artifacts": [],
|
|
"related_concepts": [],
|
|
"review_candidates": [],
|
|
"suggested_next_actions": [],
|
|
"bundle_notes": [],
|
|
}
|
|
source_bundle = {
|
|
"bundle_kind": "groundrecall_query_bundle",
|
|
"query_type": "concept",
|
|
"concept": {
|
|
"concept_id": "concept::source",
|
|
"title": "Source Concept",
|
|
"aliases": [],
|
|
"description": "Source concept",
|
|
"source_artifact_ids": ["ia_src"],
|
|
"current_status": "reviewed",
|
|
},
|
|
"relevant_claims": [
|
|
{
|
|
"claim_id": "c1",
|
|
"claim_text": "Evolution is a change in the gene pool of a population over time.",
|
|
"source_observation_ids": ["o1"],
|
|
"metadata": {},
|
|
},
|
|
{
|
|
"claim_id": "c2",
|
|
"claim_text": "Natural selection can occur without leading to evolution if traits are not inherited.",
|
|
"source_observation_ids": ["o2"],
|
|
"metadata": {},
|
|
},
|
|
],
|
|
"relations": [],
|
|
"supporting_observations": [
|
|
{
|
|
"observation_id": "o1",
|
|
"artifact_id": "ia_src",
|
|
"text": "Evolution is a change in the gene pool of a population over time.",
|
|
"role": "claim",
|
|
"origin_path": "documents/source-one/document.md",
|
|
"grounding_status": "grounded",
|
|
},
|
|
{
|
|
"observation_id": "o2",
|
|
"artifact_id": "ia_src",
|
|
"text": "Natural selection can occur without leading to evolution if traits are not inherited.",
|
|
"role": "claim",
|
|
"origin_path": "documents/source-one/document.md",
|
|
"grounding_status": "grounded",
|
|
},
|
|
],
|
|
"source_artifacts": [
|
|
{
|
|
"artifact_id": "ia_src",
|
|
"artifact_kind": "doclift_bundle_artifact",
|
|
"title": "document",
|
|
"path": "documents/source-one/document.md",
|
|
"current_status": "reviewed",
|
|
}
|
|
],
|
|
"related_concepts": [],
|
|
}
|
|
placeholder = {
|
|
"bundle_kind": "groundrecall_query_bundle",
|
|
"query_type": "concept",
|
|
"concept": {
|
|
"concept_id": "concept::gene-pool",
|
|
"title": "Gene Pool",
|
|
"aliases": [],
|
|
"description": "Placeholder",
|
|
"source_artifact_ids": [],
|
|
"current_status": "reviewed",
|
|
},
|
|
"relevant_claims": [],
|
|
"relations": [],
|
|
"supporting_observations": [],
|
|
"source_artifacts": [],
|
|
"related_concepts": [],
|
|
"review_candidates": [],
|
|
"suggested_next_actions": [],
|
|
"bundle_notes": [],
|
|
}
|
|
(export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub), encoding="utf-8")
|
|
(export_dir / "query_bundle__source.json").write_text(json.dumps(source_bundle), encoding="utf-8")
|
|
(export_dir / "query_bundle__gene-pool.json").write_text(json.dumps(placeholder), encoding="utf-8")
|
|
(notebook_dir / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}))
|
|
|
|
binding = {
|
|
"primary_artifacts": {
|
|
"groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json",
|
|
"notebook_page": "./notebook_page__hub.json",
|
|
},
|
|
"supporting_artifacts": {
|
|
"gene_pool_bundle": "../../groundrecall/export/canonical/query_bundle__gene-pool.json",
|
|
},
|
|
}
|
|
binding_path = notebook_dir / "binding.json"
|
|
binding_path.write_text(json.dumps(binding), encoding="utf-8")
|
|
|
|
manifest = pilot / "manifests" / "first-ring-promotion-batch.yaml"
|
|
manifest.parent.mkdir(parents=True)
|
|
manifest.write_text(
|
|
"""
|
|
promotion_priority:
|
|
tier_3:
|
|
- concept: gene-pool
|
|
label: Gene Pool
|
|
compose_from:
|
|
bundle_refs:
|
|
- query_bundle__source.json
|
|
keyword_phrases:
|
|
- gene pool
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
report_path = pilot / "reports" / "pipeline.json"
|
|
phrase_path = pilot / "reports" / "phrases.json"
|
|
result = run_notebook_promotion_pipeline(
|
|
binding_path=binding_path,
|
|
manifest_path=manifest,
|
|
canonical_dir=export_dir,
|
|
output_path=report_path,
|
|
phrase_inventory_output=phrase_path,
|
|
phrase_inputs=[pilot / "normalized" / "seed-bundle"],
|
|
seed_terms=["gene pool", "natural selection"],
|
|
top_n=10,
|
|
)
|
|
|
|
report = json.loads(report_path.read_text(encoding="utf-8"))
|
|
rebuilt_bundle = json.loads((export_dir / "query_bundle__gene-pool.json").read_text(encoding="utf-8"))
|
|
assert result["report_path"] == str(report_path)
|
|
assert phrase_path.exists()
|
|
assert report["batch_promotion"]["weak_node_count"] == 1
|
|
assert report["delta"]["hub"]["related_concept_count"] == 1
|
|
assert len(rebuilt_bundle["relevant_claims"]) == 1
|