Add hub bundle rebuild utility
This commit is contained in:
parent
ad46b9e667
commit
d94249e0a8
|
|
@ -0,0 +1,142 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict[str, Any]:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _slugify(text: str) -> str:
|
||||
return re.sub(r"[^a-z0-9]+", "-", text.strip().lower()).strip("-")
|
||||
|
||||
|
||||
def _default_role(key: str, concept_id: str, title: str) -> str:
|
||||
joined = " ".join(part for part in (key, concept_id, title) if part).lower()
|
||||
if any(token in joined for token in ("plasticity", "epigenetic", "adaptation", "neutral", "chance", "selection-and-evolution", "adaptationism")):
|
||||
return "nuance"
|
||||
if any(token in joined for token in ("selection", "drift", "mutation", "population-genetics", "speciation", "testing-natural-selection")):
|
||||
return "mechanism"
|
||||
return "overview"
|
||||
|
||||
|
||||
def _claim_distinction_payload(claim: dict[str, Any]) -> dict[str, Any] | None:
|
||||
text = str(claim.get("claim_text", "")).strip()
|
||||
lowered = text.lower()
|
||||
if not text:
|
||||
return None
|
||||
patterns = [
|
||||
("non_implication", r"\bdoes not imply\b", "does not imply"),
|
||||
("decoupling", r"\b(can|may)\s+occur\s+without\b|\bwithout leading to evolution\b", "without leading to evolution"),
|
||||
("contrast", r"\bversus\b|\bvs\.?\b", "versus"),
|
||||
("contrast", r"\brather than\b", "rather than"),
|
||||
("contrast", r"\bdistinguish\b", "distinguish"),
|
||||
("contrast", r"\bnot\b.+\bbut\b", "not ... but"),
|
||||
("contrast", r"\bdoes not count as evolution\b", "does not count as evolution"),
|
||||
]
|
||||
for distinction_type, pattern, cue in patterns:
|
||||
if re.search(pattern, lowered):
|
||||
return {
|
||||
"claim_id": claim.get("claim_id", ""),
|
||||
"distinction_type": distinction_type,
|
||||
"cue": cue,
|
||||
"text": text,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def rebuild_hub_bundle_from_binding(binding_path: str | Path) -> dict[str, Any]:
|
||||
binding_file = Path(binding_path)
|
||||
binding = _load_json(binding_file)
|
||||
hub_path = (binding_file.parent / binding["primary_artifacts"]["groundrecall_query_bundle"]).resolve()
|
||||
page_path = (binding_file.parent / binding["primary_artifacts"]["notebook_page"]).resolve()
|
||||
hub = _load_json(hub_path)
|
||||
|
||||
support_map = binding.get("supporting_artifacts", {}) or {}
|
||||
support_entries: list[tuple[str, Path]] = []
|
||||
for key, rel in support_map.items():
|
||||
if not key.endswith("_bundle"):
|
||||
continue
|
||||
support_entries.append((key, (binding_file.parent / rel).resolve()))
|
||||
|
||||
artifact_by_id: dict[str, dict[str, Any]] = {}
|
||||
observation_rows: list[dict[str, Any]] = []
|
||||
related_by_id: dict[str, dict[str, Any]] = {}
|
||||
source_role_summary: Counter[str] = Counter()
|
||||
distinctions: list[dict[str, Any]] = []
|
||||
seen_obs_text: set[str] = set()
|
||||
|
||||
for key, path in support_entries:
|
||||
if not path.exists():
|
||||
continue
|
||||
payload = _load_json(path)
|
||||
concept = payload.get("concept", {}) or {}
|
||||
concept_id = str(concept.get("concept_id", "")).strip()
|
||||
title = str(concept.get("title", "")).strip()
|
||||
role = _default_role(key, concept_id, title)
|
||||
source_role_summary[role] += 1
|
||||
|
||||
if concept_id and concept_id != str(hub.get("concept", {}).get("concept_id", "")).strip():
|
||||
related_by_id[concept_id] = {
|
||||
"id": concept_id,
|
||||
"label": title or concept_id.replace("concept::", "", 1).replace("-", " ").title(),
|
||||
}
|
||||
|
||||
for artifact in payload.get("source_artifacts", []) or []:
|
||||
artifact_id = str(artifact.get("artifact_id", "")).strip()
|
||||
if not artifact_id:
|
||||
continue
|
||||
merged = dict(artifact)
|
||||
merged["source_role"] = merged.get("source_role") or role
|
||||
artifact_by_id[artifact_id] = merged
|
||||
|
||||
for obs in payload.get("supporting_observations", [])[:2]:
|
||||
text = str(obs.get("text", "")).strip()
|
||||
if not text or text in seen_obs_text:
|
||||
continue
|
||||
seen_obs_text.add(text)
|
||||
merged = dict(obs)
|
||||
merged["artifact_id"] = merged.get("artifact_id") or next(iter(concept.get("source_artifact_ids", []) or []), "")
|
||||
merged["source_role"] = merged.get("source_role") or role
|
||||
observation_rows.append(merged)
|
||||
|
||||
for claim in payload.get("relevant_claims", []) or []:
|
||||
distinction = _claim_distinction_payload(claim)
|
||||
if distinction is not None:
|
||||
distinctions.append(distinction)
|
||||
|
||||
existing_related = hub.get("related_concepts", []) or []
|
||||
for item in existing_related:
|
||||
concept_id = str(item.get("id", "") or item.get("concept_id", "")).strip()
|
||||
label = str(item.get("label", "") or item.get("title", "")).strip()
|
||||
if concept_id:
|
||||
related_by_id.setdefault(concept_id, {"id": concept_id, "label": label})
|
||||
|
||||
hub["source_artifacts"] = list(artifact_by_id.values())
|
||||
hub["supporting_observations"] = observation_rows[:12]
|
||||
hub["source_role_summary"] = dict(sorted(source_role_summary.items()))
|
||||
hub["key_distinctions"] = distinctions[:6]
|
||||
hub["related_concepts"] = list(related_by_id.values())
|
||||
notes = hub.get("bundle_notes", []) or []
|
||||
note = "Supporting source artifacts and source-role summaries were rebuilt deterministically from the hub binding manifest."
|
||||
if note not in notes:
|
||||
notes.append(note)
|
||||
hub["bundle_notes"] = notes
|
||||
hub_path.write_text(json.dumps(hub, indent=2), encoding="utf-8")
|
||||
|
||||
page_summary = export_notebook_page_from_groundrecall_bundle(hub_path, page_path)
|
||||
return {
|
||||
"hub_bundle_path": str(hub_path),
|
||||
"notebook_page_path": str(page_path),
|
||||
"source_artifact_count": len(hub["source_artifacts"]),
|
||||
"supporting_observation_count": len(hub["supporting_observations"]),
|
||||
"source_role_summary": hub["source_role_summary"],
|
||||
"distinction_count": len(hub["key_distinctions"]),
|
||||
"page_summary": page_summary["page"]["summary"],
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall
|
|||
from .augmentation_bundle_probe import write_probe_report
|
||||
from .archive_phrase_inventory import write_archive_phrase_inventory_report
|
||||
from .first_ring_batch_promotion import run_first_ring_batch_promotion
|
||||
from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding
|
||||
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||
from .notebook_page import export_notebook_page_from_groundrecall_store
|
||||
from .review_loader import load_draft_pack
|
||||
|
|
@ -93,6 +94,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
first_ring_parser.add_argument("manifest_path")
|
||||
first_ring_parser.add_argument("canonical_dir")
|
||||
first_ring_parser.add_argument("--output-dir")
|
||||
|
||||
hub_rebuild_parser = subparsers.add_parser(
|
||||
"hub-bundle-rebuild",
|
||||
help="Rebuild a hub bundle support layer from the bundle paths listed in a hub binding manifest",
|
||||
)
|
||||
hub_rebuild_parser.add_argument("binding_path")
|
||||
return parser
|
||||
|
||||
|
||||
|
|
@ -205,4 +212,8 @@ def main() -> None:
|
|||
)
|
||||
print(summary)
|
||||
return
|
||||
if args.command == "hub-bundle-rebuild":
|
||||
summary = rebuild_hub_bundle_from_binding(args.binding_path)
|
||||
print(summary)
|
||||
return
|
||||
build_parser().print_help()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from didactopus.hub_bundle_rebuild import rebuild_hub_bundle_from_binding
|
||||
|
||||
|
||||
def test_rebuild_hub_bundle_from_binding_updates_support_layer(tmp_path: Path) -> None:
|
||||
root = tmp_path / "pilot" / "didactopus" / "notebook-page"
|
||||
root.mkdir(parents=True)
|
||||
export_dir = tmp_path / "pilot" / "groundrecall" / "export" / "canonical"
|
||||
export_dir.mkdir(parents=True)
|
||||
|
||||
hub = {
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"concept": {
|
||||
"concept_id": "concept::hub",
|
||||
"title": "Hub",
|
||||
"aliases": [],
|
||||
"description": "Hub concept",
|
||||
"source_artifact_ids": ["ia_hub"],
|
||||
"current_status": "reviewed",
|
||||
},
|
||||
"relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}],
|
||||
"relations": [],
|
||||
"supporting_observations": [],
|
||||
"source_artifacts": [],
|
||||
"related_concepts": [],
|
||||
"review_candidates": [],
|
||||
"suggested_next_actions": [],
|
||||
"bundle_notes": [],
|
||||
}
|
||||
support = {
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"concept": {
|
||||
"concept_id": "concept::natural-selection",
|
||||
"title": "Natural Selection",
|
||||
"aliases": [],
|
||||
"description": "Support concept",
|
||||
"source_artifact_ids": ["ia_sel"],
|
||||
"current_status": "reviewed",
|
||||
},
|
||||
"relevant_claims": [
|
||||
{"claim_id": "c1", "claim_text": "Natural selection can occur without leading to evolution if traits are not inherited."}
|
||||
],
|
||||
"relations": [],
|
||||
"supporting_observations": [
|
||||
{
|
||||
"observation_id": "o1",
|
||||
"artifact_id": "ia_sel",
|
||||
"text": "Natural selection can occur without leading to evolution if traits are not inherited.",
|
||||
"role": "claim",
|
||||
"origin_path": "documents/selection/document.md",
|
||||
"grounding_status": "grounded",
|
||||
}
|
||||
],
|
||||
"source_artifacts": [
|
||||
{
|
||||
"artifact_id": "ia_sel",
|
||||
"artifact_kind": "doclift_bundle_artifact",
|
||||
"title": "document",
|
||||
"path": "documents/selection/document.md",
|
||||
"current_status": "reviewed",
|
||||
}
|
||||
],
|
||||
"related_concepts": [],
|
||||
}
|
||||
(export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub))
|
||||
(export_dir / "query_bundle__natural-selection.json").write_text(json.dumps(support))
|
||||
(root / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}))
|
||||
binding = {
|
||||
"primary_artifacts": {
|
||||
"groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json",
|
||||
"notebook_page": "./notebook_page__hub.json",
|
||||
},
|
||||
"supporting_artifacts": {
|
||||
"natural_selection_bundle": "../../groundrecall/export/canonical/query_bundle__natural-selection.json",
|
||||
},
|
||||
}
|
||||
binding_path = root / "binding.json"
|
||||
binding_path.write_text(json.dumps(binding))
|
||||
|
||||
result = rebuild_hub_bundle_from_binding(binding_path)
|
||||
|
||||
rebuilt = json.loads((export_dir / "groundrecall_query_bundle__hub.json").read_text())
|
||||
assert result["source_artifact_count"] == 1
|
||||
assert rebuilt["source_role_summary"]["mechanism"] == 1
|
||||
assert len(rebuilt["key_distinctions"]) == 1
|
||||
assert rebuilt["related_concepts"][0]["id"] == "concept::natural-selection"
|
||||
Loading…
Reference in New Issue