From d94249e0a8e786b668bfb7cff8944ccb410fd1ac Mon Sep 17 00:00:00 2001 From: welsberr Date: Sun, 10 May 2026 12:43:01 -0400 Subject: [PATCH] Add hub bundle rebuild utility --- src/didactopus/hub_bundle_rebuild.py | 142 +++++++++++++++++++++++++++ src/didactopus/main.py | 11 +++ tests/test_hub_bundle_rebuild.py | 90 +++++++++++++++++ 3 files changed, 243 insertions(+) create mode 100644 src/didactopus/hub_bundle_rebuild.py create mode 100644 tests/test_hub_bundle_rebuild.py diff --git a/src/didactopus/hub_bundle_rebuild.py b/src/didactopus/hub_bundle_rebuild.py new file mode 100644 index 0000000..b2b6513 --- /dev/null +++ b/src/didactopus/hub_bundle_rebuild.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import json +import re +from collections import Counter +from pathlib import Path +from typing import Any + +from .notebook_page import export_notebook_page_from_groundrecall_bundle + + +def _load_json(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def _slugify(text: str) -> str: + return re.sub(r"[^a-z0-9]+", "-", text.strip().lower()).strip("-") + + +def _default_role(key: str, concept_id: str, title: str) -> str: + joined = " ".join(part for part in (key, concept_id, title) if part).lower() + if any(token in joined for token in ("plasticity", "epigenetic", "adaptation", "neutral", "chance", "selection-and-evolution", "adaptationism")): + return "nuance" + if any(token in joined for token in ("selection", "drift", "mutation", "population-genetics", "speciation", "testing-natural-selection")): + return "mechanism" + return "overview" + + +def _claim_distinction_payload(claim: dict[str, Any]) -> dict[str, Any] | None: + text = str(claim.get("claim_text", "")).strip() + lowered = text.lower() + if not text: + return None + patterns = [ + ("non_implication", r"\bdoes not imply\b", "does not imply"), + ("decoupling", r"\b(can|may)\s+occur\s+without\b|\bwithout leading to evolution\b", "without leading to evolution"), + ("contrast", r"\bversus\b|\bvs\.?\b", "versus"), + ("contrast", r"\brather than\b", "rather than"), + ("contrast", r"\bdistinguish\b", "distinguish"), + ("contrast", r"\bnot\b.+\bbut\b", "not ... but"), + ("contrast", r"\bdoes not count as evolution\b", "does not count as evolution"), + ] + for distinction_type, pattern, cue in patterns: + if re.search(pattern, lowered): + return { + "claim_id": claim.get("claim_id", ""), + "distinction_type": distinction_type, + "cue": cue, + "text": text, + } + return None + + +def rebuild_hub_bundle_from_binding(binding_path: str | Path) -> dict[str, Any]: + binding_file = Path(binding_path) + binding = _load_json(binding_file) + hub_path = (binding_file.parent / binding["primary_artifacts"]["groundrecall_query_bundle"]).resolve() + page_path = (binding_file.parent / binding["primary_artifacts"]["notebook_page"]).resolve() + hub = _load_json(hub_path) + + support_map = binding.get("supporting_artifacts", {}) or {} + support_entries: list[tuple[str, Path]] = [] + for key, rel in support_map.items(): + if not key.endswith("_bundle"): + continue + support_entries.append((key, (binding_file.parent / rel).resolve())) + + artifact_by_id: dict[str, dict[str, Any]] = {} + observation_rows: list[dict[str, Any]] = [] + related_by_id: dict[str, dict[str, Any]] = {} + source_role_summary: Counter[str] = Counter() + distinctions: list[dict[str, Any]] = [] + seen_obs_text: set[str] = set() + + for key, path in support_entries: + if not path.exists(): + continue + payload = _load_json(path) + concept = payload.get("concept", {}) or {} + concept_id = str(concept.get("concept_id", "")).strip() + title = str(concept.get("title", "")).strip() + role = _default_role(key, concept_id, title) + source_role_summary[role] += 1 + + if concept_id and concept_id != str(hub.get("concept", {}).get("concept_id", "")).strip(): + related_by_id[concept_id] = { + "id": concept_id, + "label": title or concept_id.replace("concept::", "", 1).replace("-", " ").title(), + } + + for artifact in payload.get("source_artifacts", []) or []: + artifact_id = str(artifact.get("artifact_id", "")).strip() + if not artifact_id: + continue + merged = dict(artifact) + merged["source_role"] = merged.get("source_role") or role + artifact_by_id[artifact_id] = merged + + for obs in payload.get("supporting_observations", [])[:2]: + text = str(obs.get("text", "")).strip() + if not text or text in seen_obs_text: + continue + seen_obs_text.add(text) + merged = dict(obs) + merged["artifact_id"] = merged.get("artifact_id") or next(iter(concept.get("source_artifact_ids", []) or []), "") + merged["source_role"] = merged.get("source_role") or role + observation_rows.append(merged) + + for claim in payload.get("relevant_claims", []) or []: + distinction = _claim_distinction_payload(claim) + if distinction is not None: + distinctions.append(distinction) + + existing_related = hub.get("related_concepts", []) or [] + for item in existing_related: + concept_id = str(item.get("id", "") or item.get("concept_id", "")).strip() + label = str(item.get("label", "") or item.get("title", "")).strip() + if concept_id: + related_by_id.setdefault(concept_id, {"id": concept_id, "label": label}) + + hub["source_artifacts"] = list(artifact_by_id.values()) + hub["supporting_observations"] = observation_rows[:12] + hub["source_role_summary"] = dict(sorted(source_role_summary.items())) + hub["key_distinctions"] = distinctions[:6] + hub["related_concepts"] = list(related_by_id.values()) + notes = hub.get("bundle_notes", []) or [] + note = "Supporting source artifacts and source-role summaries were rebuilt deterministically from the hub binding manifest." + if note not in notes: + notes.append(note) + hub["bundle_notes"] = notes + hub_path.write_text(json.dumps(hub, indent=2), encoding="utf-8") + + page_summary = export_notebook_page_from_groundrecall_bundle(hub_path, page_path) + return { + "hub_bundle_path": str(hub_path), + "notebook_page_path": str(page_path), + "source_artifact_count": len(hub["source_artifacts"]), + "supporting_observation_count": len(hub["supporting_observations"]), + "source_role_summary": hub["source_role_summary"], + "distinction_count": len(hub["key_distinctions"]), + "page_summary": page_summary["page"]["summary"], + } diff --git a/src/didactopus/main.py b/src/didactopus/main.py index 8e2a5bc..35bcddb 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -10,6 +10,7 @@ from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall from .augmentation_bundle_probe import write_probe_report from .archive_phrase_inventory import write_archive_phrase_inventory_report from .first_ring_batch_promotion import run_first_ring_batch_promotion +from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding from .notebook_page import export_notebook_page_from_groundrecall_bundle from .notebook_page import export_notebook_page_from_groundrecall_store from .review_loader import load_draft_pack @@ -93,6 +94,12 @@ def build_parser() -> argparse.ArgumentParser: first_ring_parser.add_argument("manifest_path") first_ring_parser.add_argument("canonical_dir") first_ring_parser.add_argument("--output-dir") + + hub_rebuild_parser = subparsers.add_parser( + "hub-bundle-rebuild", + help="Rebuild a hub bundle support layer from the bundle paths listed in a hub binding manifest", + ) + hub_rebuild_parser.add_argument("binding_path") return parser @@ -205,4 +212,8 @@ def main() -> None: ) print(summary) return + if args.command == "hub-bundle-rebuild": + summary = rebuild_hub_bundle_from_binding(args.binding_path) + print(summary) + return build_parser().print_help() diff --git a/tests/test_hub_bundle_rebuild.py b/tests/test_hub_bundle_rebuild.py new file mode 100644 index 0000000..2bc70b0 --- /dev/null +++ b/tests/test_hub_bundle_rebuild.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from didactopus.hub_bundle_rebuild import rebuild_hub_bundle_from_binding + + +def test_rebuild_hub_bundle_from_binding_updates_support_layer(tmp_path: Path) -> None: + root = tmp_path / "pilot" / "didactopus" / "notebook-page" + root.mkdir(parents=True) + export_dir = tmp_path / "pilot" / "groundrecall" / "export" / "canonical" + export_dir.mkdir(parents=True) + + hub = { + "bundle_kind": "groundrecall_query_bundle", + "concept": { + "concept_id": "concept::hub", + "title": "Hub", + "aliases": [], + "description": "Hub concept", + "source_artifact_ids": ["ia_hub"], + "current_status": "reviewed", + }, + "relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}], + "relations": [], + "supporting_observations": [], + "source_artifacts": [], + "related_concepts": [], + "review_candidates": [], + "suggested_next_actions": [], + "bundle_notes": [], + } + support = { + "bundle_kind": "groundrecall_query_bundle", + "concept": { + "concept_id": "concept::natural-selection", + "title": "Natural Selection", + "aliases": [], + "description": "Support concept", + "source_artifact_ids": ["ia_sel"], + "current_status": "reviewed", + }, + "relevant_claims": [ + {"claim_id": "c1", "claim_text": "Natural selection can occur without leading to evolution if traits are not inherited."} + ], + "relations": [], + "supporting_observations": [ + { + "observation_id": "o1", + "artifact_id": "ia_sel", + "text": "Natural selection can occur without leading to evolution if traits are not inherited.", + "role": "claim", + "origin_path": "documents/selection/document.md", + "grounding_status": "grounded", + } + ], + "source_artifacts": [ + { + "artifact_id": "ia_sel", + "artifact_kind": "doclift_bundle_artifact", + "title": "document", + "path": "documents/selection/document.md", + "current_status": "reviewed", + } + ], + "related_concepts": [], + } + (export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub)) + (export_dir / "query_bundle__natural-selection.json").write_text(json.dumps(support)) + (root / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}})) + binding = { + "primary_artifacts": { + "groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json", + "notebook_page": "./notebook_page__hub.json", + }, + "supporting_artifacts": { + "natural_selection_bundle": "../../groundrecall/export/canonical/query_bundle__natural-selection.json", + }, + } + binding_path = root / "binding.json" + binding_path.write_text(json.dumps(binding)) + + result = rebuild_hub_bundle_from_binding(binding_path) + + rebuilt = json.loads((export_dir / "groundrecall_query_bundle__hub.json").read_text()) + assert result["source_artifact_count"] == 1 + assert rebuilt["source_role_summary"]["mechanism"] == 1 + assert len(rebuilt["key_distinctions"]) == 1 + assert rebuilt["related_concepts"][0]["id"] == "concept::natural-selection"