Add hub bundle rebuild utility

This commit is contained in:
welsberr 2026-05-10 12:43:01 -04:00
parent ad46b9e667
commit d94249e0a8
3 changed files with 243 additions and 0 deletions

View File

@ -0,0 +1,142 @@
from __future__ import annotations
import json
import re
from collections import Counter
from pathlib import Path
from typing import Any
from .notebook_page import export_notebook_page_from_groundrecall_bundle
def _load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def _slugify(text: str) -> str:
return re.sub(r"[^a-z0-9]+", "-", text.strip().lower()).strip("-")
def _default_role(key: str, concept_id: str, title: str) -> str:
joined = " ".join(part for part in (key, concept_id, title) if part).lower()
if any(token in joined for token in ("plasticity", "epigenetic", "adaptation", "neutral", "chance", "selection-and-evolution", "adaptationism")):
return "nuance"
if any(token in joined for token in ("selection", "drift", "mutation", "population-genetics", "speciation", "testing-natural-selection")):
return "mechanism"
return "overview"
def _claim_distinction_payload(claim: dict[str, Any]) -> dict[str, Any] | None:
text = str(claim.get("claim_text", "")).strip()
lowered = text.lower()
if not text:
return None
patterns = [
("non_implication", r"\bdoes not imply\b", "does not imply"),
("decoupling", r"\b(can|may)\s+occur\s+without\b|\bwithout leading to evolution\b", "without leading to evolution"),
("contrast", r"\bversus\b|\bvs\.?\b", "versus"),
("contrast", r"\brather than\b", "rather than"),
("contrast", r"\bdistinguish\b", "distinguish"),
("contrast", r"\bnot\b.+\bbut\b", "not ... but"),
("contrast", r"\bdoes not count as evolution\b", "does not count as evolution"),
]
for distinction_type, pattern, cue in patterns:
if re.search(pattern, lowered):
return {
"claim_id": claim.get("claim_id", ""),
"distinction_type": distinction_type,
"cue": cue,
"text": text,
}
return None
def rebuild_hub_bundle_from_binding(binding_path: str | Path) -> dict[str, Any]:
binding_file = Path(binding_path)
binding = _load_json(binding_file)
hub_path = (binding_file.parent / binding["primary_artifacts"]["groundrecall_query_bundle"]).resolve()
page_path = (binding_file.parent / binding["primary_artifacts"]["notebook_page"]).resolve()
hub = _load_json(hub_path)
support_map = binding.get("supporting_artifacts", {}) or {}
support_entries: list[tuple[str, Path]] = []
for key, rel in support_map.items():
if not key.endswith("_bundle"):
continue
support_entries.append((key, (binding_file.parent / rel).resolve()))
artifact_by_id: dict[str, dict[str, Any]] = {}
observation_rows: list[dict[str, Any]] = []
related_by_id: dict[str, dict[str, Any]] = {}
source_role_summary: Counter[str] = Counter()
distinctions: list[dict[str, Any]] = []
seen_obs_text: set[str] = set()
for key, path in support_entries:
if not path.exists():
continue
payload = _load_json(path)
concept = payload.get("concept", {}) or {}
concept_id = str(concept.get("concept_id", "")).strip()
title = str(concept.get("title", "")).strip()
role = _default_role(key, concept_id, title)
source_role_summary[role] += 1
if concept_id and concept_id != str(hub.get("concept", {}).get("concept_id", "")).strip():
related_by_id[concept_id] = {
"id": concept_id,
"label": title or concept_id.replace("concept::", "", 1).replace("-", " ").title(),
}
for artifact in payload.get("source_artifacts", []) or []:
artifact_id = str(artifact.get("artifact_id", "")).strip()
if not artifact_id:
continue
merged = dict(artifact)
merged["source_role"] = merged.get("source_role") or role
artifact_by_id[artifact_id] = merged
for obs in payload.get("supporting_observations", [])[:2]:
text = str(obs.get("text", "")).strip()
if not text or text in seen_obs_text:
continue
seen_obs_text.add(text)
merged = dict(obs)
merged["artifact_id"] = merged.get("artifact_id") or next(iter(concept.get("source_artifact_ids", []) or []), "")
merged["source_role"] = merged.get("source_role") or role
observation_rows.append(merged)
for claim in payload.get("relevant_claims", []) or []:
distinction = _claim_distinction_payload(claim)
if distinction is not None:
distinctions.append(distinction)
existing_related = hub.get("related_concepts", []) or []
for item in existing_related:
concept_id = str(item.get("id", "") or item.get("concept_id", "")).strip()
label = str(item.get("label", "") or item.get("title", "")).strip()
if concept_id:
related_by_id.setdefault(concept_id, {"id": concept_id, "label": label})
hub["source_artifacts"] = list(artifact_by_id.values())
hub["supporting_observations"] = observation_rows[:12]
hub["source_role_summary"] = dict(sorted(source_role_summary.items()))
hub["key_distinctions"] = distinctions[:6]
hub["related_concepts"] = list(related_by_id.values())
notes = hub.get("bundle_notes", []) or []
note = "Supporting source artifacts and source-role summaries were rebuilt deterministically from the hub binding manifest."
if note not in notes:
notes.append(note)
hub["bundle_notes"] = notes
hub_path.write_text(json.dumps(hub, indent=2), encoding="utf-8")
page_summary = export_notebook_page_from_groundrecall_bundle(hub_path, page_path)
return {
"hub_bundle_path": str(hub_path),
"notebook_page_path": str(page_path),
"source_artifact_count": len(hub["source_artifacts"]),
"supporting_observation_count": len(hub["supporting_observations"]),
"source_role_summary": hub["source_role_summary"],
"distinction_count": len(hub["key_distinctions"]),
"page_summary": page_summary["page"]["summary"],
}

View File

@ -10,6 +10,7 @@ from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall
from .augmentation_bundle_probe import write_probe_report from .augmentation_bundle_probe import write_probe_report
from .archive_phrase_inventory import write_archive_phrase_inventory_report from .archive_phrase_inventory import write_archive_phrase_inventory_report
from .first_ring_batch_promotion import run_first_ring_batch_promotion from .first_ring_batch_promotion import run_first_ring_batch_promotion
from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding
from .notebook_page import export_notebook_page_from_groundrecall_bundle from .notebook_page import export_notebook_page_from_groundrecall_bundle
from .notebook_page import export_notebook_page_from_groundrecall_store from .notebook_page import export_notebook_page_from_groundrecall_store
from .review_loader import load_draft_pack from .review_loader import load_draft_pack
@ -93,6 +94,12 @@ def build_parser() -> argparse.ArgumentParser:
first_ring_parser.add_argument("manifest_path") first_ring_parser.add_argument("manifest_path")
first_ring_parser.add_argument("canonical_dir") first_ring_parser.add_argument("canonical_dir")
first_ring_parser.add_argument("--output-dir") first_ring_parser.add_argument("--output-dir")
hub_rebuild_parser = subparsers.add_parser(
"hub-bundle-rebuild",
help="Rebuild a hub bundle support layer from the bundle paths listed in a hub binding manifest",
)
hub_rebuild_parser.add_argument("binding_path")
return parser return parser
@ -205,4 +212,8 @@ def main() -> None:
) )
print(summary) print(summary)
return return
if args.command == "hub-bundle-rebuild":
summary = rebuild_hub_bundle_from_binding(args.binding_path)
print(summary)
return
build_parser().print_help() build_parser().print_help()

View File

@ -0,0 +1,90 @@
from __future__ import annotations
import json
from pathlib import Path
from didactopus.hub_bundle_rebuild import rebuild_hub_bundle_from_binding
def test_rebuild_hub_bundle_from_binding_updates_support_layer(tmp_path: Path) -> None:
root = tmp_path / "pilot" / "didactopus" / "notebook-page"
root.mkdir(parents=True)
export_dir = tmp_path / "pilot" / "groundrecall" / "export" / "canonical"
export_dir.mkdir(parents=True)
hub = {
"bundle_kind": "groundrecall_query_bundle",
"concept": {
"concept_id": "concept::hub",
"title": "Hub",
"aliases": [],
"description": "Hub concept",
"source_artifact_ids": ["ia_hub"],
"current_status": "reviewed",
},
"relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}],
"relations": [],
"supporting_observations": [],
"source_artifacts": [],
"related_concepts": [],
"review_candidates": [],
"suggested_next_actions": [],
"bundle_notes": [],
}
support = {
"bundle_kind": "groundrecall_query_bundle",
"concept": {
"concept_id": "concept::natural-selection",
"title": "Natural Selection",
"aliases": [],
"description": "Support concept",
"source_artifact_ids": ["ia_sel"],
"current_status": "reviewed",
},
"relevant_claims": [
{"claim_id": "c1", "claim_text": "Natural selection can occur without leading to evolution if traits are not inherited."}
],
"relations": [],
"supporting_observations": [
{
"observation_id": "o1",
"artifact_id": "ia_sel",
"text": "Natural selection can occur without leading to evolution if traits are not inherited.",
"role": "claim",
"origin_path": "documents/selection/document.md",
"grounding_status": "grounded",
}
],
"source_artifacts": [
{
"artifact_id": "ia_sel",
"artifact_kind": "doclift_bundle_artifact",
"title": "document",
"path": "documents/selection/document.md",
"current_status": "reviewed",
}
],
"related_concepts": [],
}
(export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub))
(export_dir / "query_bundle__natural-selection.json").write_text(json.dumps(support))
(root / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}))
binding = {
"primary_artifacts": {
"groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json",
"notebook_page": "./notebook_page__hub.json",
},
"supporting_artifacts": {
"natural_selection_bundle": "../../groundrecall/export/canonical/query_bundle__natural-selection.json",
},
}
binding_path = root / "binding.json"
binding_path.write_text(json.dumps(binding))
result = rebuild_hub_bundle_from_binding(binding_path)
rebuilt = json.loads((export_dir / "groundrecall_query_bundle__hub.json").read_text())
assert result["source_artifact_count"] == 1
assert rebuilt["source_role_summary"]["mechanism"] == 1
assert len(rebuilt["key_distinctions"]) == 1
assert rebuilt["related_concepts"][0]["id"] == "concept::natural-selection"