From 44954a4ec9cddae0fc1ea6ea7a83b10343733bfe Mon Sep 17 00:00:00 2001 From: welsberr Date: Sun, 10 May 2026 14:09:12 -0400 Subject: [PATCH] Replace placeholder first-ring bundles --- src/didactopus/first_ring_batch_promotion.py | 10 ++- tests/test_first_ring_batch_promotion.py | 70 ++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/didactopus/first_ring_batch_promotion.py b/src/didactopus/first_ring_batch_promotion.py index 551210e..2d5536b 100644 --- a/src/didactopus/first_ring_batch_promotion.py +++ b/src/didactopus/first_ring_batch_promotion.py @@ -62,6 +62,10 @@ def _find_existing_bundle(index: dict[str, dict[str, Any]], concept_slug: str) - return None +def _is_placeholder_bundle(payload: dict[str, Any]) -> bool: + return not (payload.get("relevant_claims", []) or []) + + def _claim_matches(claim: dict[str, Any], keyword_phrases: list[str]) -> bool: text = str(claim.get("claim_text", "")).lower() return any(phrase in text for phrase in keyword_phrases) @@ -202,7 +206,11 @@ def run_first_ring_batch_promotion( concept_slug = str(entry["concept"]).strip() target_path = output / f"query_bundle__{concept_slug}.json" existing = _find_existing_bundle(bundle_index, concept_slug) - if existing and existing["path"].resolve() == target_path.resolve(): + if ( + existing + and existing["path"].resolve() == target_path.resolve() + and not (entry.get("compose_from") and _is_placeholder_bundle(existing["payload"])) + ): payload = existing["payload"] status = "existing" elif existing and not entry.get("compose_from"): diff --git a/tests/test_first_ring_batch_promotion.py b/tests/test_first_ring_batch_promotion.py index 6f906ba..d08e8b4 100644 --- a/tests/test_first_ring_batch_promotion.py +++ b/tests/test_first_ring_batch_promotion.py @@ -96,3 +96,73 @@ promotion_priority: synth = json.loads((canonical / "query_bundle__natural-selection.json").read_text()) assert synth["concept"]["concept_id"] == "concept::natural-selection" assert len(synth["relevant_claims"]) == 2 + + +def test_first_ring_batch_promotion_replaces_placeholder_bundle_when_compose_from_is_added(tmp_path: Path) -> None: + canonical = tmp_path / "canonical" + canonical.mkdir() + placeholder_payload = { + "bundle_kind": "groundrecall_query_bundle", + "query_type": "concept", + "concept": { + "concept_id": "concept::gene-pool", + "title": "Gene Pool", + "aliases": [], + "description": "Placeholder bundle.", + "source_artifact_ids": [], + "current_status": "reviewed", + }, + "relevant_claims": [], + "relations": [], + "supporting_observations": [], + "source_artifacts": [], + "related_concepts": [], + } + source_payload = { + "bundle_kind": "groundrecall_query_bundle", + "query_type": "concept", + "concept": { + "concept_id": "concept::variation", + "title": "Variation", + "aliases": [], + "description": "Source bundle.", + "source_artifact_ids": ["a1"], + "current_status": "reviewed", + }, + "relevant_claims": [ + { + "claim_id": "c1", + "claim_text": "Evolution is a change in the gene pool of a population over time.", + "source_observation_ids": ["o1"], + "metadata": {}, + } + ], + "relations": [], + "supporting_observations": [{"observation_id": "o1", "text": "Evolution is a change in the gene pool of a population over time."}], + "source_artifacts": [{"artifact_id": "a1", "title": "doc"}], + "related_concepts": [], + } + (canonical / "query_bundle__gene-pool.json").write_text(json.dumps(placeholder_payload)) + (canonical / "query_bundle__variation.json").write_text(json.dumps(source_payload)) + + manifest = tmp_path / "manifest.yaml" + manifest.write_text( + """ +promotion_priority: + tier_3: + - concept: gene-pool + label: Gene Pool + compose_from: + bundle_refs: + - query_bundle__variation.json + keyword_phrases: + - gene pool +""" + ) + + run_first_ring_batch_promotion(manifest, canonical) + report = json.loads((canonical / "first_ring_batch_promotion_report.json").read_text()) + statuses = {item["concept"]: item["status"] for item in report["generated"]} + assert statuses["gene-pool"] == "synthesized" + synth = json.loads((canonical / "query_bundle__gene-pool.json").read_text()) + assert len(synth["relevant_claims"]) == 1