Add Notebook augmentation comparison utility
This commit is contained in:
parent
6e660187f6
commit
e3a04b6742
|
|
@ -0,0 +1,122 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .pack_to_frontend import convert_pack
|
||||||
|
|
||||||
|
|
||||||
|
def _load_json(path: Path) -> dict[str, Any]:
|
||||||
|
return json.loads(path.read_text(encoding="utf-8")) if path.exists() else {}
|
||||||
|
|
||||||
|
|
||||||
|
def notebook_summary(pack_dir: str | Path) -> dict[str, Any]:
|
||||||
|
payload = convert_pack(pack_dir)
|
||||||
|
return payload.get("notebook", {}) or {}
|
||||||
|
|
||||||
|
|
||||||
|
def compare_notebook_packs(
|
||||||
|
baseline_pack_dir: str | Path,
|
||||||
|
augmented_pack_dir: str | Path,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
baseline_pack_dir = Path(baseline_pack_dir)
|
||||||
|
augmented_pack_dir = Path(augmented_pack_dir)
|
||||||
|
|
||||||
|
baseline_frontend = convert_pack(baseline_pack_dir)
|
||||||
|
augmented_frontend = convert_pack(augmented_pack_dir)
|
||||||
|
baseline_notebook = baseline_frontend.get("notebook", {}) or {}
|
||||||
|
augmented_notebook = augmented_frontend.get("notebook", {}) or {}
|
||||||
|
|
||||||
|
baseline_bundle = _load_json(baseline_pack_dir / "groundrecall_query_bundle.json")
|
||||||
|
augmented_bundle = _load_json(augmented_pack_dir / "groundrecall_query_bundle.json")
|
||||||
|
augmented_page = _load_json(augmented_pack_dir / "notebook_page.json")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"baseline_notebook": baseline_notebook,
|
||||||
|
"augmented_notebook": augmented_notebook,
|
||||||
|
"delta": {
|
||||||
|
"claimCount": int(augmented_notebook.get("claimCount", 0)) - int(baseline_notebook.get("claimCount", 0)),
|
||||||
|
"distinctionCount": int(augmented_notebook.get("distinctionCount", 0)) - int(baseline_notebook.get("distinctionCount", 0)),
|
||||||
|
"supportingObservationCount": int(augmented_notebook.get("supportingObservationCount", 0)) - int(baseline_notebook.get("supportingObservationCount", 0)),
|
||||||
|
"relatedConceptCount": int(augmented_notebook.get("relatedConceptCount", 0)) - int(baseline_notebook.get("relatedConceptCount", 0)),
|
||||||
|
"sourceRoleKeys": sorted(
|
||||||
|
set((augmented_notebook.get("sourceRoleSummary", {}) or {}).keys())
|
||||||
|
- set((baseline_notebook.get("sourceRoleSummary", {}) or {}).keys())
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"bundle_excerpt": {
|
||||||
|
"claim_ids": [item.get("claim_id") for item in augmented_bundle.get("relevant_claims", [])],
|
||||||
|
"source_role_summary": augmented_bundle.get("source_role_summary", {}),
|
||||||
|
"key_distinctions": (augmented_bundle.get("key_distinctions", []) or [])[:8],
|
||||||
|
},
|
||||||
|
"page_summary": augmented_page.get("summary", {}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def comparison_markdown(title: str, comparison: dict[str, Any]) -> str:
|
||||||
|
baseline = comparison.get("baseline_notebook", {}) or {}
|
||||||
|
augmented = comparison.get("augmented_notebook", {}) or {}
|
||||||
|
delta = comparison.get("delta", {}) or {}
|
||||||
|
lines = [
|
||||||
|
f"# {title}",
|
||||||
|
"",
|
||||||
|
"## Baseline",
|
||||||
|
f"- claimCount: {baseline.get('claimCount', 0)}",
|
||||||
|
f"- sourceRoleSummary: {baseline.get('sourceRoleSummary', {})}",
|
||||||
|
f"- distinctionCount: {baseline.get('distinctionCount', 0)}",
|
||||||
|
f"- supportingObservationCount: {baseline.get('supportingObservationCount', 0)}",
|
||||||
|
"",
|
||||||
|
"## Augmented",
|
||||||
|
f"- claimCount: {augmented.get('claimCount', 0)}",
|
||||||
|
f"- sourceRoleSummary: {augmented.get('sourceRoleSummary', {})}",
|
||||||
|
f"- distinctionCount: {augmented.get('distinctionCount', 0)}",
|
||||||
|
f"- supportingObservationCount: {augmented.get('supportingObservationCount', 0)}",
|
||||||
|
"",
|
||||||
|
"## Delta",
|
||||||
|
f"- claimCount: {delta.get('claimCount', 0):+d}",
|
||||||
|
f"- distinctionCount: {delta.get('distinctionCount', 0):+d}",
|
||||||
|
f"- supportingObservationCount: {delta.get('supportingObservationCount', 0):+d}",
|
||||||
|
f"- relatedConceptCount: {delta.get('relatedConceptCount', 0):+d}",
|
||||||
|
f"- new source role keys: {delta.get('sourceRoleKeys', [])}",
|
||||||
|
]
|
||||||
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def write_notebook_comparison_report(
|
||||||
|
baseline_pack_dir: str | Path,
|
||||||
|
augmented_pack_dir: str | Path,
|
||||||
|
outdir: str | Path,
|
||||||
|
title: str = "Notebook Augmentation Comparison",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
outdir = Path(outdir)
|
||||||
|
outdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
comparison = compare_notebook_packs(baseline_pack_dir, augmented_pack_dir)
|
||||||
|
(outdir / "comparison.json").write_text(json.dumps(comparison, indent=2), encoding="utf-8")
|
||||||
|
(outdir / "comparison.md").write_text(comparison_markdown(title, comparison), encoding="utf-8")
|
||||||
|
(outdir / "frontend_pack_with_notebook.json").write_text(
|
||||||
|
json.dumps(convert_pack(augmented_pack_dir), indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return comparison
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Compare baseline and augmented Notebook pack outputs.")
|
||||||
|
parser.add_argument("baseline_pack_dir")
|
||||||
|
parser.add_argument("augmented_pack_dir")
|
||||||
|
parser.add_argument("--outdir", required=True)
|
||||||
|
parser.add_argument("--title", default="Notebook Augmentation Comparison")
|
||||||
|
args = parser.parse_args()
|
||||||
|
comparison = write_notebook_comparison_report(
|
||||||
|
args.baseline_pack_dir,
|
||||||
|
args.augmented_pack_dir,
|
||||||
|
args.outdir,
|
||||||
|
title=args.title,
|
||||||
|
)
|
||||||
|
print(json.dumps(comparison, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -0,0 +1,103 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
|
||||||
|
from didactopus.notebook_augmentation_experiment import compare_notebook_packs, write_notebook_comparison_report
|
||||||
|
|
||||||
|
|
||||||
|
def _write_pack(
|
||||||
|
root: Path,
|
||||||
|
*,
|
||||||
|
claim_count: int,
|
||||||
|
source_role_summary: dict,
|
||||||
|
distinction_count: int,
|
||||||
|
supporting_observation_count: int,
|
||||||
|
related_concept_count: int,
|
||||||
|
) -> None:
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
(root / "pack.yaml").write_text(
|
||||||
|
"name: demo-pack\ndisplay_name: Demo Pack\ndescription: Demo\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "concepts.yaml").write_text(
|
||||||
|
"concepts:\n - id: thermo\n title: Thermodynamics and Entropy\n prerequisites: []\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "groundrecall_query_bundle.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"bundle_kind": "groundrecall_query_bundle",
|
||||||
|
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
|
||||||
|
"relevant_claims": [{"claim_id": f"c{i+1}"} for i in range(claim_count)],
|
||||||
|
"source_role_summary": source_role_summary,
|
||||||
|
"key_distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(root / "notebook_page.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
|
||||||
|
"summary": {
|
||||||
|
"supporting_observation_count": supporting_observation_count,
|
||||||
|
"related_concept_count": related_concept_count,
|
||||||
|
},
|
||||||
|
"source_role_summary": source_role_summary,
|
||||||
|
"distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_notebook_packs_reports_deltas(tmp_path: Path) -> None:
|
||||||
|
baseline = tmp_path / "baseline"
|
||||||
|
augmented = tmp_path / "augmented"
|
||||||
|
_write_pack(
|
||||||
|
baseline,
|
||||||
|
claim_count=3,
|
||||||
|
source_role_summary={"overview": 1},
|
||||||
|
distinction_count=0,
|
||||||
|
supporting_observation_count=3,
|
||||||
|
related_concept_count=2,
|
||||||
|
)
|
||||||
|
_write_pack(
|
||||||
|
augmented,
|
||||||
|
claim_count=7,
|
||||||
|
source_role_summary={"overview": 3, "nuance": 4},
|
||||||
|
distinction_count=5,
|
||||||
|
supporting_observation_count=7,
|
||||||
|
related_concept_count=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
comparison = compare_notebook_packs(baseline, augmented)
|
||||||
|
assert comparison["delta"]["claimCount"] == 4
|
||||||
|
assert comparison["delta"]["distinctionCount"] == 5
|
||||||
|
assert comparison["delta"]["sourceRoleKeys"] == ["nuance"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_notebook_comparison_report_emits_files(tmp_path: Path) -> None:
|
||||||
|
baseline = tmp_path / "baseline"
|
||||||
|
augmented = tmp_path / "augmented"
|
||||||
|
outdir = tmp_path / "report"
|
||||||
|
_write_pack(
|
||||||
|
baseline,
|
||||||
|
claim_count=1,
|
||||||
|
source_role_summary={"overview": 1},
|
||||||
|
distinction_count=0,
|
||||||
|
supporting_observation_count=1,
|
||||||
|
related_concept_count=1,
|
||||||
|
)
|
||||||
|
_write_pack(
|
||||||
|
augmented,
|
||||||
|
claim_count=2,
|
||||||
|
source_role_summary={"overview": 1, "nuance": 1},
|
||||||
|
distinction_count=1,
|
||||||
|
supporting_observation_count=2,
|
||||||
|
related_concept_count=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
write_notebook_comparison_report(baseline, augmented, outdir, title="Demo")
|
||||||
|
assert (outdir / "comparison.json").exists()
|
||||||
|
assert (outdir / "comparison.md").exists()
|
||||||
|
assert (outdir / "frontend_pack_with_notebook.json").exists()
|
||||||
Loading…
Reference in New Issue