Add work-map Notebook refresh command
This commit is contained in:
parent
c6840ef067
commit
06ae1c60bd
|
|
@ -12,6 +12,7 @@ from .archive_phrase_inventory import write_archive_phrase_inventory_report
|
|||
from .first_ring_batch_promotion import run_first_ring_batch_promotion
|
||||
from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding
|
||||
from .notebook_promotion_pipeline import run_notebook_promotion_pipeline
|
||||
from .notebook_workmap_refresh import run_notebook_workmap_refresh
|
||||
from .notebook_page import export_notebook_page_from_groundrecall_bundle
|
||||
from .notebook_page import export_notebook_page_from_groundrecall_store
|
||||
from .review_loader import load_draft_pack
|
||||
|
|
@ -114,6 +115,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
pipeline_parser.add_argument("--phrase-input", action="append", default=[])
|
||||
pipeline_parser.add_argument("--seed-term", action="append", default=[])
|
||||
pipeline_parser.add_argument("--top-n", type=int, default=50)
|
||||
|
||||
workmap_parser = subparsers.add_parser(
|
||||
"notebook-workmap-refresh",
|
||||
help="Run the Notebook promotion pipeline from a project work-map so active paths do not need to be reassembled by hand",
|
||||
)
|
||||
workmap_parser.add_argument("work_map_path")
|
||||
workmap_parser.add_argument("--output-path")
|
||||
workmap_parser.add_argument("--phrase-inventory-output")
|
||||
workmap_parser.add_argument("--top-n", type=int, default=50)
|
||||
return parser
|
||||
|
||||
|
||||
|
|
@ -243,4 +253,13 @@ def main() -> None:
|
|||
)
|
||||
print(summary)
|
||||
return
|
||||
if args.command == "notebook-workmap-refresh":
|
||||
summary = run_notebook_workmap_refresh(
|
||||
args.work_map_path,
|
||||
output_path=args.output_path,
|
||||
phrase_inventory_output=args.phrase_inventory_output,
|
||||
top_n=args.top_n,
|
||||
)
|
||||
print(summary)
|
||||
return
|
||||
build_parser().print_help()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .notebook_promotion_pipeline import run_notebook_promotion_pipeline
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict[str, Any]:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _resolve(base: Path, value: str) -> Path:
|
||||
path = Path(value)
|
||||
return path if path.is_absolute() else (base / path).resolve()
|
||||
|
||||
|
||||
def run_notebook_workmap_refresh(
|
||||
work_map_path: str | Path,
|
||||
*,
|
||||
output_path: str | Path | None = None,
|
||||
phrase_inventory_output: str | Path | None = None,
|
||||
top_n: int = 50,
|
||||
) -> dict[str, Any]:
|
||||
work_map_file = Path(work_map_path).resolve()
|
||||
work_root = work_map_file.parent.parent
|
||||
work_map = _load_json(work_map_file)
|
||||
|
||||
binding_path = _resolve(work_root, str(work_map["primary_hub"]["binding_path"]))
|
||||
canonical_dir = _resolve(work_root, str(work_map["groundrecall_paths"]["canonical_export_dir"]))
|
||||
manifest_path = _resolve(work_root, str(work_map["groundrecall_paths"]["batch_manifest"]))
|
||||
|
||||
report_path = (
|
||||
Path(output_path).resolve()
|
||||
if output_path
|
||||
else _resolve(work_root, str(work_map["groundrecall_paths"]["pipeline_report_json"]))
|
||||
)
|
||||
phrase_path = (
|
||||
Path(phrase_inventory_output).resolve()
|
||||
if phrase_inventory_output
|
||||
else _resolve(work_root, str(work_map["groundrecall_paths"]["pipeline_phrase_inventory_json"]))
|
||||
)
|
||||
|
||||
normalized_roots = [
|
||||
_resolve(work_root, item)
|
||||
for item in (work_map.get("canonical_sources", {}) or {}).get("normalized_roots", [])
|
||||
]
|
||||
|
||||
seed_terms = [
|
||||
str(item.get("concept", "")).strip()
|
||||
for items in (work_map.get("promotion_priority", {}) or {}).values()
|
||||
for item in items or []
|
||||
if str(item.get("concept", "")).strip()
|
||||
]
|
||||
|
||||
if not seed_terms:
|
||||
manifest_data = _load_json(manifest_path) if manifest_path.suffix == ".json" else None
|
||||
if manifest_data:
|
||||
seed_terms = [
|
||||
str(item.get("concept", "")).strip()
|
||||
for items in (manifest_data.get("promotion_priority", {}) or {}).values()
|
||||
for item in items or []
|
||||
if str(item.get("concept", "")).strip()
|
||||
]
|
||||
else:
|
||||
import yaml # local import to avoid unnecessary dependency at module import time
|
||||
|
||||
manifest_yaml = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) or {}
|
||||
seed_terms = [
|
||||
str(item.get("concept", "")).strip()
|
||||
for items in (manifest_yaml.get("promotion_priority", {}) or {}).values()
|
||||
for item in items or []
|
||||
if str(item.get("concept", "")).strip()
|
||||
]
|
||||
|
||||
summary = run_notebook_promotion_pipeline(
|
||||
binding_path=binding_path,
|
||||
manifest_path=manifest_path,
|
||||
canonical_dir=canonical_dir,
|
||||
output_path=report_path,
|
||||
phrase_inventory_output=phrase_path,
|
||||
phrase_inputs=normalized_roots,
|
||||
seed_terms=seed_terms,
|
||||
top_n=top_n,
|
||||
)
|
||||
return {
|
||||
"work_map_path": str(work_map_file),
|
||||
"work_root": str(work_root),
|
||||
"report_path": summary["report_path"],
|
||||
"markdown_path": summary["markdown_path"],
|
||||
"report": summary["report"],
|
||||
}
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from didactopus.notebook_workmap_refresh import run_notebook_workmap_refresh
|
||||
|
||||
|
||||
def test_notebook_workmap_refresh_runs_from_work_map(tmp_path: Path) -> None:
|
||||
pilot = tmp_path / "pilot"
|
||||
docs_dir = pilot / "normalized" / "seed-bundle" / "documents" / "source-one"
|
||||
docs_dir.mkdir(parents=True)
|
||||
(docs_dir / "document.md").write_text(
|
||||
"# Source One\n\nEvolution is a change in the gene pool of a population over time.\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
export_dir = pilot / "groundrecall" / "export" / "canonical"
|
||||
export_dir.mkdir(parents=True)
|
||||
notebook_dir = pilot / "didactopus" / "notebook-page"
|
||||
notebook_dir.mkdir(parents=True)
|
||||
workmap_dir = pilot / ".groundrecall"
|
||||
workmap_dir.mkdir(parents=True)
|
||||
|
||||
hub = {
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"query_type": "concept",
|
||||
"concept": {
|
||||
"concept_id": "concept::hub",
|
||||
"title": "Hub",
|
||||
"aliases": [],
|
||||
"description": "Hub concept",
|
||||
"source_artifact_ids": ["ia_hub"],
|
||||
"current_status": "reviewed",
|
||||
},
|
||||
"relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}],
|
||||
"relations": [],
|
||||
"supporting_observations": [],
|
||||
"source_artifacts": [],
|
||||
"related_concepts": [],
|
||||
"review_candidates": [],
|
||||
"suggested_next_actions": [],
|
||||
"bundle_notes": [],
|
||||
}
|
||||
source_bundle = {
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"query_type": "concept",
|
||||
"concept": {
|
||||
"concept_id": "concept::source",
|
||||
"title": "Source Concept",
|
||||
"aliases": [],
|
||||
"description": "Source concept",
|
||||
"source_artifact_ids": ["ia_src"],
|
||||
"current_status": "reviewed",
|
||||
},
|
||||
"relevant_claims": [
|
||||
{
|
||||
"claim_id": "c1",
|
||||
"claim_text": "Evolution is a change in the gene pool of a population over time.",
|
||||
"source_observation_ids": ["o1"],
|
||||
"metadata": {},
|
||||
}
|
||||
],
|
||||
"relations": [],
|
||||
"supporting_observations": [
|
||||
{
|
||||
"observation_id": "o1",
|
||||
"artifact_id": "ia_src",
|
||||
"text": "Evolution is a change in the gene pool of a population over time.",
|
||||
"role": "claim",
|
||||
"origin_path": "documents/source-one/document.md",
|
||||
"grounding_status": "grounded",
|
||||
}
|
||||
],
|
||||
"source_artifacts": [
|
||||
{
|
||||
"artifact_id": "ia_src",
|
||||
"artifact_kind": "doclift_bundle_artifact",
|
||||
"title": "document",
|
||||
"path": "documents/source-one/document.md",
|
||||
"current_status": "reviewed",
|
||||
}
|
||||
],
|
||||
"related_concepts": [],
|
||||
}
|
||||
placeholder = {
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"query_type": "concept",
|
||||
"concept": {
|
||||
"concept_id": "concept::gene-pool",
|
||||
"title": "Gene Pool",
|
||||
"aliases": [],
|
||||
"description": "Placeholder",
|
||||
"source_artifact_ids": [],
|
||||
"current_status": "reviewed",
|
||||
},
|
||||
"relevant_claims": [],
|
||||
"relations": [],
|
||||
"supporting_observations": [],
|
||||
"source_artifacts": [],
|
||||
"related_concepts": [],
|
||||
"review_candidates": [],
|
||||
"suggested_next_actions": [],
|
||||
"bundle_notes": [],
|
||||
}
|
||||
(export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub), encoding="utf-8")
|
||||
(export_dir / "query_bundle__source.json").write_text(json.dumps(source_bundle), encoding="utf-8")
|
||||
(export_dir / "query_bundle__gene-pool.json").write_text(json.dumps(placeholder), encoding="utf-8")
|
||||
(notebook_dir / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}), encoding="utf-8")
|
||||
|
||||
binding = {
|
||||
"primary_artifacts": {
|
||||
"groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json",
|
||||
"notebook_page": "./notebook_page__hub.json",
|
||||
},
|
||||
"supporting_artifacts": {
|
||||
"gene_pool_bundle": "../../groundrecall/export/canonical/query_bundle__gene-pool.json",
|
||||
},
|
||||
}
|
||||
(notebook_dir / "binding.json").write_text(json.dumps(binding), encoding="utf-8")
|
||||
|
||||
manifest = pilot / "manifests" / "first-ring-promotion-batch.yaml"
|
||||
manifest.parent.mkdir(parents=True)
|
||||
manifest.write_text(
|
||||
"""
|
||||
promotion_priority:
|
||||
tier_3:
|
||||
- concept: gene-pool
|
||||
label: Gene Pool
|
||||
compose_from:
|
||||
bundle_refs:
|
||||
- query_bundle__source.json
|
||||
keyword_phrases:
|
||||
- gene pool
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
work_map = {
|
||||
"project": "pilot",
|
||||
"primary_hub": {
|
||||
"binding_path": "didactopus/notebook-page/binding.json",
|
||||
},
|
||||
"canonical_sources": {
|
||||
"normalized_roots": ["normalized/seed-bundle"],
|
||||
},
|
||||
"groundrecall_paths": {
|
||||
"canonical_export_dir": "groundrecall/export/canonical",
|
||||
"batch_manifest": "manifests/first-ring-promotion-batch.yaml",
|
||||
"pipeline_report_json": "groundrecall/report.json",
|
||||
"pipeline_phrase_inventory_json": "groundrecall/phrases.json",
|
||||
},
|
||||
}
|
||||
work_map_path = workmap_dir / "work-map.json"
|
||||
work_map_path.write_text(json.dumps(work_map), encoding="utf-8")
|
||||
|
||||
result = run_notebook_workmap_refresh(work_map_path, top_n=10)
|
||||
|
||||
report = json.loads(Path(result["report_path"]).read_text(encoding="utf-8"))
|
||||
rebuilt_bundle = json.loads((export_dir / "query_bundle__gene-pool.json").read_text(encoding="utf-8"))
|
||||
assert report["batch_promotion"]["weak_node_count"] == 1
|
||||
assert report["delta"]["hub"]["related_concept_count"] == 1
|
||||
assert len(rebuilt_bundle["relevant_claims"]) == 1
|
||||
Loading…
Reference in New Issue