From 06ae1c60bd3b01178e055b2a0891b190264da4e8 Mon Sep 17 00:00:00 2001 From: welsberr Date: Mon, 11 May 2026 09:18:20 -0400 Subject: [PATCH] Add work-map Notebook refresh command --- src/didactopus/main.py | 19 +++ src/didactopus/notebook_workmap_refresh.py | 93 ++++++++++++ tests/test_notebook_workmap_refresh.py | 163 +++++++++++++++++++++ 3 files changed, 275 insertions(+) create mode 100644 src/didactopus/notebook_workmap_refresh.py create mode 100644 tests/test_notebook_workmap_refresh.py diff --git a/src/didactopus/main.py b/src/didactopus/main.py index c53e3a6..35ea70e 100644 --- a/src/didactopus/main.py +++ b/src/didactopus/main.py @@ -12,6 +12,7 @@ from .archive_phrase_inventory import write_archive_phrase_inventory_report from .first_ring_batch_promotion import run_first_ring_batch_promotion from .hub_bundle_rebuild import rebuild_hub_bundle_from_binding from .notebook_promotion_pipeline import run_notebook_promotion_pipeline +from .notebook_workmap_refresh import run_notebook_workmap_refresh from .notebook_page import export_notebook_page_from_groundrecall_bundle from .notebook_page import export_notebook_page_from_groundrecall_store from .review_loader import load_draft_pack @@ -114,6 +115,15 @@ def build_parser() -> argparse.ArgumentParser: pipeline_parser.add_argument("--phrase-input", action="append", default=[]) pipeline_parser.add_argument("--seed-term", action="append", default=[]) pipeline_parser.add_argument("--top-n", type=int, default=50) + + workmap_parser = subparsers.add_parser( + "notebook-workmap-refresh", + help="Run the Notebook promotion pipeline from a project work-map so active paths do not need to be reassembled by hand", + ) + workmap_parser.add_argument("work_map_path") + workmap_parser.add_argument("--output-path") + workmap_parser.add_argument("--phrase-inventory-output") + workmap_parser.add_argument("--top-n", type=int, default=50) return parser @@ -243,4 +253,13 @@ def main() -> None: ) print(summary) return + if args.command == "notebook-workmap-refresh": + summary = run_notebook_workmap_refresh( + args.work_map_path, + output_path=args.output_path, + phrase_inventory_output=args.phrase_inventory_output, + top_n=args.top_n, + ) + print(summary) + return build_parser().print_help() diff --git a/src/didactopus/notebook_workmap_refresh.py b/src/didactopus/notebook_workmap_refresh.py new file mode 100644 index 0000000..46f08ad --- /dev/null +++ b/src/didactopus/notebook_workmap_refresh.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from .notebook_promotion_pipeline import run_notebook_promotion_pipeline + + +def _load_json(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def _resolve(base: Path, value: str) -> Path: + path = Path(value) + return path if path.is_absolute() else (base / path).resolve() + + +def run_notebook_workmap_refresh( + work_map_path: str | Path, + *, + output_path: str | Path | None = None, + phrase_inventory_output: str | Path | None = None, + top_n: int = 50, +) -> dict[str, Any]: + work_map_file = Path(work_map_path).resolve() + work_root = work_map_file.parent.parent + work_map = _load_json(work_map_file) + + binding_path = _resolve(work_root, str(work_map["primary_hub"]["binding_path"])) + canonical_dir = _resolve(work_root, str(work_map["groundrecall_paths"]["canonical_export_dir"])) + manifest_path = _resolve(work_root, str(work_map["groundrecall_paths"]["batch_manifest"])) + + report_path = ( + Path(output_path).resolve() + if output_path + else _resolve(work_root, str(work_map["groundrecall_paths"]["pipeline_report_json"])) + ) + phrase_path = ( + Path(phrase_inventory_output).resolve() + if phrase_inventory_output + else _resolve(work_root, str(work_map["groundrecall_paths"]["pipeline_phrase_inventory_json"])) + ) + + normalized_roots = [ + _resolve(work_root, item) + for item in (work_map.get("canonical_sources", {}) or {}).get("normalized_roots", []) + ] + + seed_terms = [ + str(item.get("concept", "")).strip() + for items in (work_map.get("promotion_priority", {}) or {}).values() + for item in items or [] + if str(item.get("concept", "")).strip() + ] + + if not seed_terms: + manifest_data = _load_json(manifest_path) if manifest_path.suffix == ".json" else None + if manifest_data: + seed_terms = [ + str(item.get("concept", "")).strip() + for items in (manifest_data.get("promotion_priority", {}) or {}).values() + for item in items or [] + if str(item.get("concept", "")).strip() + ] + else: + import yaml # local import to avoid unnecessary dependency at module import time + + manifest_yaml = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) or {} + seed_terms = [ + str(item.get("concept", "")).strip() + for items in (manifest_yaml.get("promotion_priority", {}) or {}).values() + for item in items or [] + if str(item.get("concept", "")).strip() + ] + + summary = run_notebook_promotion_pipeline( + binding_path=binding_path, + manifest_path=manifest_path, + canonical_dir=canonical_dir, + output_path=report_path, + phrase_inventory_output=phrase_path, + phrase_inputs=normalized_roots, + seed_terms=seed_terms, + top_n=top_n, + ) + return { + "work_map_path": str(work_map_file), + "work_root": str(work_root), + "report_path": summary["report_path"], + "markdown_path": summary["markdown_path"], + "report": summary["report"], + } diff --git a/tests/test_notebook_workmap_refresh.py b/tests/test_notebook_workmap_refresh.py new file mode 100644 index 0000000..b765a29 --- /dev/null +++ b/tests/test_notebook_workmap_refresh.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from didactopus.notebook_workmap_refresh import run_notebook_workmap_refresh + + +def test_notebook_workmap_refresh_runs_from_work_map(tmp_path: Path) -> None: + pilot = tmp_path / "pilot" + docs_dir = pilot / "normalized" / "seed-bundle" / "documents" / "source-one" + docs_dir.mkdir(parents=True) + (docs_dir / "document.md").write_text( + "# Source One\n\nEvolution is a change in the gene pool of a population over time.\n", + encoding="utf-8", + ) + + export_dir = pilot / "groundrecall" / "export" / "canonical" + export_dir.mkdir(parents=True) + notebook_dir = pilot / "didactopus" / "notebook-page" + notebook_dir.mkdir(parents=True) + workmap_dir = pilot / ".groundrecall" + workmap_dir.mkdir(parents=True) + + hub = { + "bundle_kind": "groundrecall_query_bundle", + "query_type": "concept", + "concept": { + "concept_id": "concept::hub", + "title": "Hub", + "aliases": [], + "description": "Hub concept", + "source_artifact_ids": ["ia_hub"], + "current_status": "reviewed", + }, + "relevant_claims": [{"claim_id": "hc1", "claim_text": "Hub claim."}], + "relations": [], + "supporting_observations": [], + "source_artifacts": [], + "related_concepts": [], + "review_candidates": [], + "suggested_next_actions": [], + "bundle_notes": [], + } + source_bundle = { + "bundle_kind": "groundrecall_query_bundle", + "query_type": "concept", + "concept": { + "concept_id": "concept::source", + "title": "Source Concept", + "aliases": [], + "description": "Source concept", + "source_artifact_ids": ["ia_src"], + "current_status": "reviewed", + }, + "relevant_claims": [ + { + "claim_id": "c1", + "claim_text": "Evolution is a change in the gene pool of a population over time.", + "source_observation_ids": ["o1"], + "metadata": {}, + } + ], + "relations": [], + "supporting_observations": [ + { + "observation_id": "o1", + "artifact_id": "ia_src", + "text": "Evolution is a change in the gene pool of a population over time.", + "role": "claim", + "origin_path": "documents/source-one/document.md", + "grounding_status": "grounded", + } + ], + "source_artifacts": [ + { + "artifact_id": "ia_src", + "artifact_kind": "doclift_bundle_artifact", + "title": "document", + "path": "documents/source-one/document.md", + "current_status": "reviewed", + } + ], + "related_concepts": [], + } + placeholder = { + "bundle_kind": "groundrecall_query_bundle", + "query_type": "concept", + "concept": { + "concept_id": "concept::gene-pool", + "title": "Gene Pool", + "aliases": [], + "description": "Placeholder", + "source_artifact_ids": [], + "current_status": "reviewed", + }, + "relevant_claims": [], + "relations": [], + "supporting_observations": [], + "source_artifacts": [], + "related_concepts": [], + "review_candidates": [], + "suggested_next_actions": [], + "bundle_notes": [], + } + (export_dir / "groundrecall_query_bundle__hub.json").write_text(json.dumps(hub), encoding="utf-8") + (export_dir / "query_bundle__source.json").write_text(json.dumps(source_bundle), encoding="utf-8") + (export_dir / "query_bundle__gene-pool.json").write_text(json.dumps(placeholder), encoding="utf-8") + (notebook_dir / "notebook_page__hub.json").write_text(json.dumps({"concept": {"concept_id": "concept::hub"}, "summary": {}}), encoding="utf-8") + + binding = { + "primary_artifacts": { + "groundrecall_query_bundle": "../../groundrecall/export/canonical/groundrecall_query_bundle__hub.json", + "notebook_page": "./notebook_page__hub.json", + }, + "supporting_artifacts": { + "gene_pool_bundle": "../../groundrecall/export/canonical/query_bundle__gene-pool.json", + }, + } + (notebook_dir / "binding.json").write_text(json.dumps(binding), encoding="utf-8") + + manifest = pilot / "manifests" / "first-ring-promotion-batch.yaml" + manifest.parent.mkdir(parents=True) + manifest.write_text( + """ +promotion_priority: + tier_3: + - concept: gene-pool + label: Gene Pool + compose_from: + bundle_refs: + - query_bundle__source.json + keyword_phrases: + - gene pool +""", + encoding="utf-8", + ) + + work_map = { + "project": "pilot", + "primary_hub": { + "binding_path": "didactopus/notebook-page/binding.json", + }, + "canonical_sources": { + "normalized_roots": ["normalized/seed-bundle"], + }, + "groundrecall_paths": { + "canonical_export_dir": "groundrecall/export/canonical", + "batch_manifest": "manifests/first-ring-promotion-batch.yaml", + "pipeline_report_json": "groundrecall/report.json", + "pipeline_phrase_inventory_json": "groundrecall/phrases.json", + }, + } + work_map_path = workmap_dir / "work-map.json" + work_map_path.write_text(json.dumps(work_map), encoding="utf-8") + + result = run_notebook_workmap_refresh(work_map_path, top_n=10) + + report = json.loads(Path(result["report_path"]).read_text(encoding="utf-8")) + rebuilt_bundle = json.loads((export_dir / "query_bundle__gene-pool.json").read_text(encoding="utf-8")) + assert report["batch_promotion"]["weak_node_count"] == 1 + assert report["delta"]["hub"]["related_concept_count"] == 1 + assert len(rebuilt_bundle["relevant_claims"]) == 1