Add hub bundle rebuild utility
This commit is contained in:
parent
ad46b9e667
commit
d94249e0a8
|
|
@ -0,0 +1,142 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||||
|
|
||||||
|
|
||||||
|
def _load_json(path: Path) -> dict[str, Any]:
|
||||||
|
return json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _slugify(text: str) -> str:
|
||||||
|
return re.sub(r"[^a-z0-9]+", "-", text.strip().lower()).strip("-")
|
||||||
|
|
||||||
|
|
||||||
|
def _default_role(key: str, concept_id: str, title: str) -> str:
|
||||||
|
joined = " ".join(part for part in (key, concept_id, title) if part).lower()
|
||||||
|
if any(token in joined for token in ("plasticity", "epigenetic", "adaptation", "neutral", "chance", "selection-and-evolution", "adaptationism")):
|
||||||
|
return "nuance"
|
||||||
|
if any(token in joined for token in ("selection", "drift", "mutation", "population-genetics", "speciation", "testing-natural-selection")):
|
||||||
|
return "mechanism"
|
||||||
|
return "overview"
|
||||||
|
|
||||||
|
|
||||||
|
def _claim_distinction_payload(claim: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
|
text = str(claim.get("claim_text", "")).strip()
|
||||||
|
lowered = text.lower()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
patterns = [
|
||||||
|
("non_implication", r"\bdoes not imply\b", "does not imply"),
|
||||||
|
("decoupling", r"\b(can|may)\s+occur\s+without\b|\bwithout leading to evolution\b", "without leading to evolution"),
|
||||||
|
("contrast", r"\bversus\b|\bvs\.?\b", "versus"),
|
||||||
|
("contrast", r"\brather than\b", "rather than"),
|
||||||
|
("contrast", r"\bdistinguish\b", "distinguish"),
|
||||||
|
("contrast", r"\bnot\b.+\bbut\b", "not ... but"),
|
||||||
|
("contrast", r"\bdoes not count as evolution\b", "does not count as evolution"),
|
||||||
|
]
|
||||||
|
for distinction_type, pattern, cue in patterns:
|
||||||
|
if re.search(pattern, lowered):
|
||||||
|
return {
|
||||||
|
"claim_id": claim.get("claim_id", ""),
|
||||||
|
"distinction_type": distinction_type,
|
||||||
|
"cue": cue,
|
||||||
|
"text": text,
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_hub_bundle_from_binding(binding_path: str | Path) -> dict[str, Any]:
|
||||||
|
binding_file = Path(binding_path)
|
||||||
|
binding = _load_json(binding_file)
|
||||||
|
hub_path = (binding_file.parent / binding["primary_artifacts"]["groundrecall_query_bundle"]).resolve()
|
||||||
|
page_path = (binding_file.parent / binding["primary_artifacts"]["notebook_page"]).resolve()
|
||||||
|
hub = _load_json(hub_path)
|
||||||
|
|
||||||
|
support_map = binding.get("supporting_artifacts", {}) or {}
|
||||||
|
support_entries: list[tuple[str, Path]] = []
|
||||||
|
for key, rel in support_map.items():
|
||||||
|
if not key.endswith("_bundle"):
|
||||||
|
continue
|
||||||
|
support_entries.append((key, (binding_file.parent / rel).resolve()))
|
||||||
|
|
||||||
|
artifact_by_id: dict[str, dict[str, Any]] = {}
|
||||||
|
observation_rows: list[dict[str, Any]] = []
|
||||||
|
related_by_id: dict[str, dict[str, Any]] = {}
|
||||||
|
source_role_summary: Counter[str] = Counter()
|
||||||
|
distinctions: list[dict[str, Any]] = []
|
||||||
|
seen_obs_text: set[str] = set()
|
||||||
|
|
||||||
|
for key, path in support_entries:
|
||||||
|
if not path.exists():
|
||||||
|
continue
|
||||||
|
payload = _load_json(path)
|
||||||
|
concept = payload.get("concept", {}) or {}
|
||||||
|
concept_id = str(concept.get("concept_id", "")).strip()
|
||||||
|
title = str(concept.get("title", "")).strip()
|
||||||
|
role = _default_role(key, concept_id, title)
|
||||||
|
source_role_summary[role] += 1
|
||||||
|
|
||||||
|
if concept_id and concept_id != str(hub.get("concept", {}).get("concept_id", "")).strip():
|
||||||
|
related_by_id[concept_id] = {
|
||||||
|
"id": concept_id,
|
||||||
|
"label": title or concept_id.replace("concept::", "", 1).replace("-", " ").title(),
|
||||||
|
}
|
||||||
|
|
||||||
|
for artifact in payload.get("source_artifacts", []) or []:
|
||||||
|
artifact_id = str(artifact.get("artifact_id", "")).strip()
|
||||||
|
if not artifact_id:
|
||||||
|
continue
|
||||||
|
merged = dict(artifact)
|
||||||
|
merged["source_role"] = merged.get("source_role") or role
|
||||||
|
artifact_by_id[artifact_id] = merged
|
||||||
|
|
||||||
|
for obs in payload.get("supporting_observations", [])[:2]:
|
||||||
|
text = str(obs.get("text", "")).strip()
|
||||||
|
if not text or text in seen_obs_text:
|
||||||
|
continue
|
||||||
|
seen_obs_text.add(text)
|
||||||
|
merged = dict(obs)
|
||||||
|
merged["artifact_id"] = merged.get("artifact_id") or next(iter(concept.get("source_artifact_ids", []) or []), "")
|
||||||
|
merged["source_role"] = merged.get("source_role") or role
|
||||||
|
observation_rows.append(merged)
|
||||||
|
|
||||||
|
for claim in payload.get("relevant_claims", []) or []:
|
||||||
|
distinction = _claim_distinction_payload(claim)
|
||||||
|
if distinction is not None:
|
||||||
|
distinctions.append(distinction)
|
||||||
|
|
||||||
|
existing_related = hub.get("related_concepts", []) or []
|
||||||
|
for item in existing_related:
|
||||||
|
concept_id = str(item.get("id", "") or item.get("concept_id", "")).strip()
|
||||||
|
label = str(item.get("label", "") or item.get("title", "")).strip()
|
||||||
|
if concept_id:
|
||||||
|
related_by_id.setdefault(concept_id, {"id": concept_id, "label": label})
|
||||||
|
|
||||||
|
hub["source_artifacts"] = list(artifact_by_id.values())
|
||||||
|
hub["supporting_observations"] = observation_rows[:12]
|
||||||
|
hub["source_role_summary"] = dict(sorted(source_role_summary.items()))
|
||||||
|
hub["key_distinctions"] = distinctions[:6]
|
||||||
|
hub["related_concepts"] = list(related_by_id.values())
|
||||||
|
notes = hub.get("bundle_notes", []) or []
|
||||||
|
note = "Supporting source artifacts and source-role summaries were rebuilt deterministically from the hub binding manifest."
|
||||||
|
if note not in notes:
|
||||||
|
notes.append(note)
|
||||||
|
hub["bundle_notes"] = notes
|
||||||
|
hub_path.write_text(json.dumps(hub, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
|
page_summary = export_notebook_page_from_groundrecall_bundle(hub_path, page_path)
|
||||||
|
return {
|
||||||
|
"hub_bundle_path": str(hub_path),
|
||||||
|
"notebook_page_path": str(page_path),
|
||||||
|
"source_artifact_count": len(hub["source_artifacts"]),
|
||||||
|
"supporting_observation_count": len(hub["supporting_observations"]),
|
||||||
|
"source_role_summary": hub["source_role_summary"],
|
||||||
|
"distinction_count": len(hub["key_distinctions"]),
|
||||||
|
"page_summary": page_summary["page"]["summary"],
|
||||||
|
}
|
||||||
|
|
@ -10,6 +10,7 @@ from .groundrecall_pack_bridge import run_doclift_bundle_with_groundrecall
|
||||||
from .augmentation_bundle_probe import write_probe_report
|
from .augmentation_bundle_probe import write_probe_report
|
||||||
from .archive_phrase_inventory import write_archive_phrase_inventory_report
|
from .archive_phrase_inventory import write_archive_phrase_inventory_report
|
||||||
from .first_ring_batch_promotion import run_first_ring_batch_promotion
|
from .first_ring_batch_promotion import run_first_ring_batch_promotion
|
||||||
|
from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding
|
||||||
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||||
from .notebook_page import export_notebook_page_from_groundrecall_store
|
from .notebook_page import export_notebook_page_from_groundrecall_store
|
||||||
from .review_loader import load_draft_pack
|
from .review_loader import load_draft_pack
|
||||||
|
|
@ -93,6 +94,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
first_ring_parser.add_argument("manifest_path")
|
first_ring_parser.add_argument("manifest_path")
|
||||||
first_ring_parser.add_argument("canonical_dir")
|
first_ring_parser.add_argument("canonical_dir")
|
||||||
first_ring_parser.add_argument("--output-dir")
|
first_ring_parser.add_argument("--output-dir")
|
||||||
|
|
||||||
|
hub_rebuild_parser = subparsers.add_parser(
|
||||||
|
"hub-bundle-rebuild",
|
||||||
|
help="Rebuild a hub bundle support layer from the bundle paths listed in a hub binding manifest",
|
||||||
|
)
|
||||||
|
hub_rebuild_parser.add_argument("binding_path")
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -205,4 +212,8 @@ def main() -> None:
|
||||||
)
|
)
|
||||||
print(summary)
|
print(summary)
|
||||||
return
|
return
|
||||||
|
if args.command == "hub-bundle-rebuild":
|
||||||
|
summary = rebuild_hub_bundle_from_binding(args.binding_path)
|
||||||
|
print(summary)
|
||||||
|
return
|
||||||
build_parser().print_help()
|
build_parser().print_help()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from didactopus.hub_bundle_rebuild import rebuild_hub_bundle_from_binding
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_hub_bundle_from_binding_updates_support_layer(tmp_path: Path) -> None:
|
||||||
|
root = tmp_path / "pilot" / "didactopus" / "notebook-page"
|
||||||
|
root.mkdir(parents=True)
|
||||||
|
export_dir = tmp_path / "pilot" / "groundrecall" / "export" / "canonical"
|
||||||
|
export_dir.mkdir(parents=True)
|
||||||
|
|
||||||
|
hub = {
|
||||||
|
"bundle_kind": "groundrecall_query_bundle",
|
||||||
|
"concept": {
|
||||||
|
"concept_id": "concept::hub",
|
||||||
|
"title": "Hub",
|
||||||
|
"aliases": [],
|
||||||
|
"description": "Hub concept",
|
||||||
|
"source_artifact_ids": ["ia_hub"],
|
||||||
|
"current_status": "reviewed",
|
||||||
|
},
|
||||||
|
"relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}],
|
||||||
|
"relations": [],
|
||||||
|
"supporting_observations": [],
|
||||||
|
"source_artifacts": [],
|
||||||
|
"related_concepts": [],
|
||||||
|
"review_candidates": [],
|
||||||
|
"suggested_next_actions": [],
|
||||||
|
"bundle_notes": [],
|
||||||
|
}
|
||||||
|
support = {
|
||||||
|
"bundle_kind": "groundrecall_query_bundle",
|
||||||
|
"concept": {
|
||||||
|
"concept_id": "concept::natural-selection",
|
||||||
|
"title": "Natural Selection",
|
||||||
|
"aliases": [],
|
||||||
|
"description": "Support concept",
|
||||||
|
"source_artifact_ids": ["ia_sel"],
|
||||||
|
"current_status": "reviewed",
|
||||||
|
},
|
||||||
|
"relevant_claims": [
|
||||||
|
{"claim_id": "c1", "claim_text": "Natural selection can occur without leading to evolution if traits are not inherited."}
|
||||||
|
],
|
||||||
|
"relations": [],
|
||||||
|
"supporting_observations": [
|
||||||
|
{
|
||||||
|
"observation_id": "o1",
|
||||||
|
"artifact_id": "ia_sel",
|
||||||
|
"text": "Natural selection can occur without leading to evolution if traits are not inherited.",
|
||||||
|
"role": "claim",
|
||||||
|
"origin_path": "documents/selection/document.md",
|
||||||
|
"grounding_status": "grounded",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source_artifacts": [
|
||||||
|
{
|
||||||
|
"artifact_id": "ia_sel",
|
||||||
|
"artifact_kind": "doclift_bundle_artifact",
|
||||||
|
"title": "document",
|
||||||
|
"path": "documents/selection/document.md",
|
||||||
|
"current_status": "reviewed",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"related_concepts": [],
|
||||||
|
}
|
||||||
|
(export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub))
|
||||||
|
(export_dir / "query_bundle__natural-selection.json").write_text(json.dumps(support))
|
||||||
|
(root / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}))
|
||||||
|
binding = {
|
||||||
|
"primary_artifacts": {
|
||||||
|
"groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json",
|
||||||
|
"notebook_page": "./notebook_page__hub.json",
|
||||||
|
},
|
||||||
|
"supporting_artifacts": {
|
||||||
|
"natural_selection_bundle": "../../groundrecall/export/canonical/query_bundle__natural-selection.json",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
binding_path = root / "binding.json"
|
||||||
|
binding_path.write_text(json.dumps(binding))
|
||||||
|
|
||||||
|
result = rebuild_hub_bundle_from_binding(binding_path)
|
||||||
|
|
||||||
|
rebuilt = json.loads((export_dir / "groundrecall_query_bundle__hub.json").read_text())
|
||||||
|
assert result["source_artifact_count"] == 1
|
||||||
|
assert rebuilt["source_role_summary"]["mechanism"] == 1
|
||||||
|
assert len(rebuilt["key_distinctions"]) == 1
|
||||||
|
assert rebuilt["related_concepts"][0]["id"] == "concept::natural-selection"
|
||||||
Loading…
Reference in New Issue