Add Notebook augmentation comparison utility

This commit is contained in:
welsberr 2026-05-08 21:27:27 -04:00
parent 6e660187f6
commit e3a04b6742
2 changed files with 225 additions and 0 deletions

View File

@ -0,0 +1,122 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
from .pack_to_frontend import convert_pack
def _load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8")) if path.exists() else {}
def notebook_summary(pack_dir: str | Path) -> dict[str, Any]:
payload = convert_pack(pack_dir)
return payload.get("notebook", {}) or {}
def compare_notebook_packs(
baseline_pack_dir: str | Path,
augmented_pack_dir: str | Path,
) -> dict[str, Any]:
baseline_pack_dir = Path(baseline_pack_dir)
augmented_pack_dir = Path(augmented_pack_dir)
baseline_frontend = convert_pack(baseline_pack_dir)
augmented_frontend = convert_pack(augmented_pack_dir)
baseline_notebook = baseline_frontend.get("notebook", {}) or {}
augmented_notebook = augmented_frontend.get("notebook", {}) or {}
baseline_bundle = _load_json(baseline_pack_dir / "groundrecall_query_bundle.json")
augmented_bundle = _load_json(augmented_pack_dir / "groundrecall_query_bundle.json")
augmented_page = _load_json(augmented_pack_dir / "notebook_page.json")
return {
"baseline_notebook": baseline_notebook,
"augmented_notebook": augmented_notebook,
"delta": {
"claimCount": int(augmented_notebook.get("claimCount", 0)) - int(baseline_notebook.get("claimCount", 0)),
"distinctionCount": int(augmented_notebook.get("distinctionCount", 0)) - int(baseline_notebook.get("distinctionCount", 0)),
"supportingObservationCount": int(augmented_notebook.get("supportingObservationCount", 0)) - int(baseline_notebook.get("supportingObservationCount", 0)),
"relatedConceptCount": int(augmented_notebook.get("relatedConceptCount", 0)) - int(baseline_notebook.get("relatedConceptCount", 0)),
"sourceRoleKeys": sorted(
set((augmented_notebook.get("sourceRoleSummary", {}) or {}).keys())
- set((baseline_notebook.get("sourceRoleSummary", {}) or {}).keys())
),
},
"bundle_excerpt": {
"claim_ids": [item.get("claim_id") for item in augmented_bundle.get("relevant_claims", [])],
"source_role_summary": augmented_bundle.get("source_role_summary", {}),
"key_distinctions": (augmented_bundle.get("key_distinctions", []) or [])[:8],
},
"page_summary": augmented_page.get("summary", {}),
}
def comparison_markdown(title: str, comparison: dict[str, Any]) -> str:
baseline = comparison.get("baseline_notebook", {}) or {}
augmented = comparison.get("augmented_notebook", {}) or {}
delta = comparison.get("delta", {}) or {}
lines = [
f"# {title}",
"",
"## Baseline",
f"- claimCount: {baseline.get('claimCount', 0)}",
f"- sourceRoleSummary: {baseline.get('sourceRoleSummary', {})}",
f"- distinctionCount: {baseline.get('distinctionCount', 0)}",
f"- supportingObservationCount: {baseline.get('supportingObservationCount', 0)}",
"",
"## Augmented",
f"- claimCount: {augmented.get('claimCount', 0)}",
f"- sourceRoleSummary: {augmented.get('sourceRoleSummary', {})}",
f"- distinctionCount: {augmented.get('distinctionCount', 0)}",
f"- supportingObservationCount: {augmented.get('supportingObservationCount', 0)}",
"",
"## Delta",
f"- claimCount: {delta.get('claimCount', 0):+d}",
f"- distinctionCount: {delta.get('distinctionCount', 0):+d}",
f"- supportingObservationCount: {delta.get('supportingObservationCount', 0):+d}",
f"- relatedConceptCount: {delta.get('relatedConceptCount', 0):+d}",
f"- new source role keys: {delta.get('sourceRoleKeys', [])}",
]
return "\n".join(lines) + "\n"
def write_notebook_comparison_report(
baseline_pack_dir: str | Path,
augmented_pack_dir: str | Path,
outdir: str | Path,
title: str = "Notebook Augmentation Comparison",
) -> dict[str, Any]:
outdir = Path(outdir)
outdir.mkdir(parents=True, exist_ok=True)
comparison = compare_notebook_packs(baseline_pack_dir, augmented_pack_dir)
(outdir / "comparison.json").write_text(json.dumps(comparison, indent=2), encoding="utf-8")
(outdir / "comparison.md").write_text(comparison_markdown(title, comparison), encoding="utf-8")
(outdir / "frontend_pack_with_notebook.json").write_text(
json.dumps(convert_pack(augmented_pack_dir), indent=2),
encoding="utf-8",
)
return comparison
def main() -> None:
parser = argparse.ArgumentParser(description="Compare baseline and augmented Notebook pack outputs.")
parser.add_argument("baseline_pack_dir")
parser.add_argument("augmented_pack_dir")
parser.add_argument("--outdir", required=True)
parser.add_argument("--title", default="Notebook Augmentation Comparison")
args = parser.parse_args()
comparison = write_notebook_comparison_report(
args.baseline_pack_dir,
args.augmented_pack_dir,
args.outdir,
title=args.title,
)
print(json.dumps(comparison, indent=2))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,103 @@
from pathlib import Path
import json
from didactopus.notebook_augmentation_experiment import compare_notebook_packs, write_notebook_comparison_report
def _write_pack(
root: Path,
*,
claim_count: int,
source_role_summary: dict,
distinction_count: int,
supporting_observation_count: int,
related_concept_count: int,
) -> None:
root.mkdir(parents=True, exist_ok=True)
(root / "pack.yaml").write_text(
"name: demo-pack\ndisplay_name: Demo Pack\ndescription: Demo\n",
encoding="utf-8",
)
(root / "concepts.yaml").write_text(
"concepts:\n - id: thermo\n title: Thermodynamics and Entropy\n prerequisites: []\n",
encoding="utf-8",
)
(root / "groundrecall_query_bundle.json").write_text(
json.dumps(
{
"bundle_kind": "groundrecall_query_bundle",
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
"relevant_claims": [{"claim_id": f"c{i+1}"} for i in range(claim_count)],
"source_role_summary": source_role_summary,
"key_distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
}
),
encoding="utf-8",
)
(root / "notebook_page.json").write_text(
json.dumps(
{
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
"summary": {
"supporting_observation_count": supporting_observation_count,
"related_concept_count": related_concept_count,
},
"source_role_summary": source_role_summary,
"distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
}
),
encoding="utf-8",
)
def test_compare_notebook_packs_reports_deltas(tmp_path: Path) -> None:
baseline = tmp_path / "baseline"
augmented = tmp_path / "augmented"
_write_pack(
baseline,
claim_count=3,
source_role_summary={"overview": 1},
distinction_count=0,
supporting_observation_count=3,
related_concept_count=2,
)
_write_pack(
augmented,
claim_count=7,
source_role_summary={"overview": 3, "nuance": 4},
distinction_count=5,
supporting_observation_count=7,
related_concept_count=2,
)
comparison = compare_notebook_packs(baseline, augmented)
assert comparison["delta"]["claimCount"] == 4
assert comparison["delta"]["distinctionCount"] == 5
assert comparison["delta"]["sourceRoleKeys"] == ["nuance"]
def test_write_notebook_comparison_report_emits_files(tmp_path: Path) -> None:
baseline = tmp_path / "baseline"
augmented = tmp_path / "augmented"
outdir = tmp_path / "report"
_write_pack(
baseline,
claim_count=1,
source_role_summary={"overview": 1},
distinction_count=0,
supporting_observation_count=1,
related_concept_count=1,
)
_write_pack(
augmented,
claim_count=2,
source_role_summary={"overview": 1, "nuance": 1},
distinction_count=1,
supporting_observation_count=2,
related_concept_count=1,
)
write_notebook_comparison_report(baseline, augmented, outdir, title="Demo")
assert (outdir / "comparison.json").exists()
assert (outdir / "comparison.md").exists()
assert (outdir / "frontend_pack_with_notebook.json").exists()