Add Notebook augmentation comparison utility
This commit is contained in:
parent
6e660187f6
commit
e3a04b6742
|
|
@ -0,0 +1,122 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .pack_to_frontend import convert_pack
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict[str, Any]:
|
||||
return json.loads(path.read_text(encoding="utf-8")) if path.exists() else {}
|
||||
|
||||
|
||||
def notebook_summary(pack_dir: str | Path) -> dict[str, Any]:
|
||||
payload = convert_pack(pack_dir)
|
||||
return payload.get("notebook", {}) or {}
|
||||
|
||||
|
||||
def compare_notebook_packs(
|
||||
baseline_pack_dir: str | Path,
|
||||
augmented_pack_dir: str | Path,
|
||||
) -> dict[str, Any]:
|
||||
baseline_pack_dir = Path(baseline_pack_dir)
|
||||
augmented_pack_dir = Path(augmented_pack_dir)
|
||||
|
||||
baseline_frontend = convert_pack(baseline_pack_dir)
|
||||
augmented_frontend = convert_pack(augmented_pack_dir)
|
||||
baseline_notebook = baseline_frontend.get("notebook", {}) or {}
|
||||
augmented_notebook = augmented_frontend.get("notebook", {}) or {}
|
||||
|
||||
baseline_bundle = _load_json(baseline_pack_dir / "groundrecall_query_bundle.json")
|
||||
augmented_bundle = _load_json(augmented_pack_dir / "groundrecall_query_bundle.json")
|
||||
augmented_page = _load_json(augmented_pack_dir / "notebook_page.json")
|
||||
|
||||
return {
|
||||
"baseline_notebook": baseline_notebook,
|
||||
"augmented_notebook": augmented_notebook,
|
||||
"delta": {
|
||||
"claimCount": int(augmented_notebook.get("claimCount", 0)) - int(baseline_notebook.get("claimCount", 0)),
|
||||
"distinctionCount": int(augmented_notebook.get("distinctionCount", 0)) - int(baseline_notebook.get("distinctionCount", 0)),
|
||||
"supportingObservationCount": int(augmented_notebook.get("supportingObservationCount", 0)) - int(baseline_notebook.get("supportingObservationCount", 0)),
|
||||
"relatedConceptCount": int(augmented_notebook.get("relatedConceptCount", 0)) - int(baseline_notebook.get("relatedConceptCount", 0)),
|
||||
"sourceRoleKeys": sorted(
|
||||
set((augmented_notebook.get("sourceRoleSummary", {}) or {}).keys())
|
||||
- set((baseline_notebook.get("sourceRoleSummary", {}) or {}).keys())
|
||||
),
|
||||
},
|
||||
"bundle_excerpt": {
|
||||
"claim_ids": [item.get("claim_id") for item in augmented_bundle.get("relevant_claims", [])],
|
||||
"source_role_summary": augmented_bundle.get("source_role_summary", {}),
|
||||
"key_distinctions": (augmented_bundle.get("key_distinctions", []) or [])[:8],
|
||||
},
|
||||
"page_summary": augmented_page.get("summary", {}),
|
||||
}
|
||||
|
||||
|
||||
def comparison_markdown(title: str, comparison: dict[str, Any]) -> str:
|
||||
baseline = comparison.get("baseline_notebook", {}) or {}
|
||||
augmented = comparison.get("augmented_notebook", {}) or {}
|
||||
delta = comparison.get("delta", {}) or {}
|
||||
lines = [
|
||||
f"# {title}",
|
||||
"",
|
||||
"## Baseline",
|
||||
f"- claimCount: {baseline.get('claimCount', 0)}",
|
||||
f"- sourceRoleSummary: {baseline.get('sourceRoleSummary', {})}",
|
||||
f"- distinctionCount: {baseline.get('distinctionCount', 0)}",
|
||||
f"- supportingObservationCount: {baseline.get('supportingObservationCount', 0)}",
|
||||
"",
|
||||
"## Augmented",
|
||||
f"- claimCount: {augmented.get('claimCount', 0)}",
|
||||
f"- sourceRoleSummary: {augmented.get('sourceRoleSummary', {})}",
|
||||
f"- distinctionCount: {augmented.get('distinctionCount', 0)}",
|
||||
f"- supportingObservationCount: {augmented.get('supportingObservationCount', 0)}",
|
||||
"",
|
||||
"## Delta",
|
||||
f"- claimCount: {delta.get('claimCount', 0):+d}",
|
||||
f"- distinctionCount: {delta.get('distinctionCount', 0):+d}",
|
||||
f"- supportingObservationCount: {delta.get('supportingObservationCount', 0):+d}",
|
||||
f"- relatedConceptCount: {delta.get('relatedConceptCount', 0):+d}",
|
||||
f"- new source role keys: {delta.get('sourceRoleKeys', [])}",
|
||||
]
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def write_notebook_comparison_report(
|
||||
baseline_pack_dir: str | Path,
|
||||
augmented_pack_dir: str | Path,
|
||||
outdir: str | Path,
|
||||
title: str = "Notebook Augmentation Comparison",
|
||||
) -> dict[str, Any]:
|
||||
outdir = Path(outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
comparison = compare_notebook_packs(baseline_pack_dir, augmented_pack_dir)
|
||||
(outdir / "comparison.json").write_text(json.dumps(comparison, indent=2), encoding="utf-8")
|
||||
(outdir / "comparison.md").write_text(comparison_markdown(title, comparison), encoding="utf-8")
|
||||
(outdir / "frontend_pack_with_notebook.json").write_text(
|
||||
json.dumps(convert_pack(augmented_pack_dir), indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return comparison
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Compare baseline and augmented Notebook pack outputs.")
|
||||
parser.add_argument("baseline_pack_dir")
|
||||
parser.add_argument("augmented_pack_dir")
|
||||
parser.add_argument("--outdir", required=True)
|
||||
parser.add_argument("--title", default="Notebook Augmentation Comparison")
|
||||
args = parser.parse_args()
|
||||
comparison = write_notebook_comparison_report(
|
||||
args.baseline_pack_dir,
|
||||
args.augmented_pack_dir,
|
||||
args.outdir,
|
||||
title=args.title,
|
||||
)
|
||||
print(json.dumps(comparison, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
|
||||
from didactopus.notebook_augmentation_experiment import compare_notebook_packs, write_notebook_comparison_report
|
||||
|
||||
|
||||
def _write_pack(
|
||||
root: Path,
|
||||
*,
|
||||
claim_count: int,
|
||||
source_role_summary: dict,
|
||||
distinction_count: int,
|
||||
supporting_observation_count: int,
|
||||
related_concept_count: int,
|
||||
) -> None:
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
(root / "pack.yaml").write_text(
|
||||
"name: demo-pack\ndisplay_name: Demo Pack\ndescription: Demo\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "concepts.yaml").write_text(
|
||||
"concepts:\n - id: thermo\n title: Thermodynamics and Entropy\n prerequisites: []\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "groundrecall_query_bundle.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"bundle_kind": "groundrecall_query_bundle",
|
||||
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
|
||||
"relevant_claims": [{"claim_id": f"c{i+1}"} for i in range(claim_count)],
|
||||
"source_role_summary": source_role_summary,
|
||||
"key_distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(root / "notebook_page.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"concept": {"concept_id": "concept::thermo", "title": "Thermodynamics and Entropy"},
|
||||
"summary": {
|
||||
"supporting_observation_count": supporting_observation_count,
|
||||
"related_concept_count": related_concept_count,
|
||||
},
|
||||
"source_role_summary": source_role_summary,
|
||||
"distinctions": [{"distinction_type": "contrast"} for _ in range(distinction_count)],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def test_compare_notebook_packs_reports_deltas(tmp_path: Path) -> None:
|
||||
baseline = tmp_path / "baseline"
|
||||
augmented = tmp_path / "augmented"
|
||||
_write_pack(
|
||||
baseline,
|
||||
claim_count=3,
|
||||
source_role_summary={"overview": 1},
|
||||
distinction_count=0,
|
||||
supporting_observation_count=3,
|
||||
related_concept_count=2,
|
||||
)
|
||||
_write_pack(
|
||||
augmented,
|
||||
claim_count=7,
|
||||
source_role_summary={"overview": 3, "nuance": 4},
|
||||
distinction_count=5,
|
||||
supporting_observation_count=7,
|
||||
related_concept_count=2,
|
||||
)
|
||||
|
||||
comparison = compare_notebook_packs(baseline, augmented)
|
||||
assert comparison["delta"]["claimCount"] == 4
|
||||
assert comparison["delta"]["distinctionCount"] == 5
|
||||
assert comparison["delta"]["sourceRoleKeys"] == ["nuance"]
|
||||
|
||||
|
||||
def test_write_notebook_comparison_report_emits_files(tmp_path: Path) -> None:
|
||||
baseline = tmp_path / "baseline"
|
||||
augmented = tmp_path / "augmented"
|
||||
outdir = tmp_path / "report"
|
||||
_write_pack(
|
||||
baseline,
|
||||
claim_count=1,
|
||||
source_role_summary={"overview": 1},
|
||||
distinction_count=0,
|
||||
supporting_observation_count=1,
|
||||
related_concept_count=1,
|
||||
)
|
||||
_write_pack(
|
||||
augmented,
|
||||
claim_count=2,
|
||||
source_role_summary={"overview": 1, "nuance": 1},
|
||||
distinction_count=1,
|
||||
supporting_observation_count=2,
|
||||
related_concept_count=1,
|
||||
)
|
||||
|
||||
write_notebook_comparison_report(baseline, augmented, outdir, title="Demo")
|
||||
assert (outdir / "comparison.json").exists()
|
||||
assert (outdir / "comparison.md").exists()
|
||||
assert (outdir / "frontend_pack_with_notebook.json").exists()
|
||||
Loading…
Reference in New Issue