From 8e4b4eb216271711ad40f5223c99fa8dfc921dbf Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 11 Apr 2026 06:45:21 -0400 Subject: [PATCH] Migrate Track 1 report layer into renunney --- Makefile | 19 ++- README.md | 4 +- docs/MIGRATION.md | 9 +- docs/WORKFLOW.md | 4 +- src/renunney/__init__.py | 20 +++ src/renunney/track1_api.py | 2 +- src/renunney/track1_report.py | 294 ++++++++++++++++++++++++++++++++++ tests/test_track1_report.py | 44 +++++ 8 files changed, 382 insertions(+), 14 deletions(-) create mode 100644 src/renunney/track1_report.py create mode 100644 tests/test_track1_report.py diff --git a/Makefile b/Makefile index 9a03c5d..0bba958 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ TRACK1 := $(REPO_ROOT)/scripts/run_track1.py DB := $(REPO_ROOT)/runs/state/cos-orch.sqlite RESULT_ROOT := $(REPO_ROOT)/runs/results SCRATCH_ROOT := $(REPO_ROOT)/runs/scratch +MPLCONFIGDIR := $(SCRATCH_ROOT)/matplotlib FIG1_M005 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_05.json FIG1_M025 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_25.json @@ -38,7 +39,7 @@ help: @echo " results-tree List the current result files" init: - mkdir -p $(REPO_ROOT)/runs/state $(REPO_ROOT)/runs/results $(REPO_ROOT)/runs/scratch + mkdir -p $(REPO_ROOT)/runs/state $(REPO_ROOT)/runs/results $(REPO_ROOT)/runs/scratch $(MPLCONFIGDIR) $(PYTHON) $(ORCH) init-db --db $(DB) doctor: @@ -49,6 +50,7 @@ doctor: @echo "DB=$(DB)" @echo "RESULT_ROOT=$(RESULT_ROOT)" @echo "SCRATCH_ROOT=$(SCRATCH_ROOT)" + @echo "MPLCONFIGDIR=$(MPLCONFIGDIR)" test -f $(ORCH) test -f $(TRACK1) test -d $(LEGACY_ROOT)/python @@ -57,19 +59,24 @@ list-jobs: $(PYTHON) $(ORCH) list --db $(DB) track1-sim-smoke: - $(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1 + mkdir -p $(MPLCONFIGDIR) + MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1 run-one: - $(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) + mkdir -p $(MPLCONFIGDIR) + MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) run-loop: - $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) + mkdir -p $(MPLCONFIGDIR) + MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) run-loop-one: - $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) --max-jobs 1 + mkdir -p $(MPLCONFIGDIR) + MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) --max-jobs 1 collate-figure1: - $(PYTHON) $(ORCH) collate-figure1 --db $(DB) --output $(RESULT_ROOT)/figure1-collated.json + mkdir -p $(MPLCONFIGDIR) + MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) collate-figure1 --db $(DB) --output $(RESULT_ROOT)/figure1-collated.json submit-figure1-m005: $(PYTHON) $(ORCH) submit-figure1 --db $(DB) --config $(FIG1_M005) --job-prefix fig1-m005 --created-by make diff --git a/README.md b/README.md index b54f2df..ce1d3d6 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`. - a local Track 1 analysis layer for tracking summaries and loci-regression, - a local Track 1 threshold/search layer for Nunney-style threshold checks, - a local Track 1 simulation kernel, +- a local Track 1 report generator, - a Makefile for common tasks, - migration notes for pulling code into this repo in stages. @@ -88,7 +89,8 @@ The current state is split: - Track 1 analysis layer: local to `renunney` - Track 1 threshold/search layer: local to `renunney` - Track 1 simulation kernel: local to `renunney` -- Track 1 report, dataset, fit, and extinction-model helpers: still imported +- Track 1 report generator: local to `renunney` +- Track 1 dataset, fit, and extinction-model helpers: still imported from the older `cost_of_substitution` directory through the local compatibility layer diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index 1bb068b..c94f5b0 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -33,13 +33,14 @@ Operational code still lives in: - `src/renunney/track1_threshold.py` 6. Track 1 simulation kernel has been migrated locally: - `src/renunney/track1_reference.py` -7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules next: - - `python/track1_report.py` +7. Track 1 report layer has been migrated locally: + - `src/renunney/track1_report.py` +8. Migrate dataset, fit, and orchestration-adjacent Track 1 modules next: - `python/track1_dataset.py` - `python/track1_fit.py` - `python/track1_extinction.py` -8. Reduce or remove the remaining compatibility-layer imports after those modules are local. -9. Migrate docs and example configs last, after path references are updated. +9. Reduce or remove the remaining compatibility-layer imports after those modules are local. +10. Migrate docs and example configs last, after path references are updated. ## Constraint diff --git a/docs/WORKFLOW.md b/docs/WORKFLOW.md index 549251a..0a94c21 100644 --- a/docs/WORKFLOW.md +++ b/docs/WORKFLOW.md @@ -48,8 +48,8 @@ make status The Makefile now drives the local orchestration code in `renunney`, while the Track 1 runner/API boundary, analysis layer, threshold/search layer, and -simulation kernel are also local to `renunney`. The remaining Track 1 -report/dataset/fit helpers are still imported from the legacy +simulation kernel and report generator are also local to `renunney`. The +remaining Track 1 dataset/fit helpers are still imported from the legacy `cost_of_substitution` directory through the compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission are now local to `renunney/config`. diff --git a/src/renunney/__init__.py b/src/renunney/__init__.py index d851912..ff9ea93 100644 --- a/src/renunney/__init__.py +++ b/src/renunney/__init__.py @@ -48,6 +48,17 @@ from .track1_reference import ( simulate_run, summarize_generation, ) +from .track1_report import ( + SeriesCI, + aggregate_derived_series, + aggregate_series, + confidence_interval, + generate_report_bundle, + plot_mean_allele_vs_target, + plot_series, + plot_series_with_reference, + render_markdown_report, +) from .track1_threshold import ( ThresholdCheck, ThresholdSearchResult, @@ -81,6 +92,7 @@ __all__ = [ "repo_root", "run_one_job", "run_worker_loop", + "SeriesCI", "submit_job_manifest", "submit_track1_figure1_jobs", "ThresholdCheck", @@ -99,13 +111,18 @@ __all__ = [ "fit_linear_cost_by_loci", "generation_metrics", "genotype_fitness", + "generate_report_bundle", "initialize_population", "is_extinct", "load_config", "nunney_threshold_accepts", "paper_mutation_supply_M", "published_threshold_accepts", + "plot_mean_allele_vs_target", + "plot_series", + "plot_series_with_reference", "realize_birth_counts", + "render_markdown_report", "run_config", "simulate_one_generation", "simulate_run", @@ -115,4 +132,7 @@ __all__ = [ "summarize_tracking", "summarize_generation", "sweep_number_of_loci", + "aggregate_derived_series", + "aggregate_series", + "confidence_interval", ] diff --git a/src/renunney/track1_api.py b/src/renunney/track1_api.py index 829e22a..9055577 100644 --- a/src/renunney/track1_api.py +++ b/src/renunney/track1_api.py @@ -17,13 +17,13 @@ from typing import Any, Optional from .legacy import ensure_legacy_python_path from .track1_analysis import summarize_tracking, sweep_number_of_loci from .track1_reference import Track1Parameters, simulate_run +from .track1_report import generate_report_bundle from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates ensure_legacy_python_path() from track1_dataset import generate_extinction_dataset from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl -from track1_report import generate_report_bundle @dataclass(frozen=True, init=False) diff --git a/src/renunney/track1_report.py b/src/renunney/track1_report.py new file mode 100644 index 0000000..0a93f56 --- /dev/null +++ b/src/renunney/track1_report.py @@ -0,0 +1,294 @@ +""" +track1_report.py + +Local report generation for Track 1 simulation runs. +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +import json +from pathlib import Path +from typing import Iterable + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +from .track1_analysis import summarize_tracking +from .track1_reference import GenerationSummary, Track1Parameters, simulate_run + + +@dataclass(frozen=True) +class SeriesCI: + t: int + count: int + mean: float + ci_low: float + ci_high: float + + +def confidence_interval(values: Iterable[float]) -> tuple[int, float, float, float]: + arr = np.array(list(values), dtype=float) + count = int(arr.size) + if count == 0: + return 0, 0.0, 0.0, 0.0 + mean = float(np.mean(arr)) + if count == 1: + return count, mean, mean, mean + sem = float(np.std(arr, ddof=1) / np.sqrt(count)) + margin = 1.96 * sem + return count, mean, mean - margin, mean + margin + + +def aggregate_series(runs: list[list[GenerationSummary]], attr: str) -> list[SeriesCI]: + by_t: dict[int, list[float]] = {} + for run in runs: + for summary in run: + by_t.setdefault(int(summary.t), []).append(float(getattr(summary, attr))) + rows: list[SeriesCI] = [] + for t in sorted(by_t): + count, mean, low, high = confidence_interval(by_t[t]) + rows.append(SeriesCI(t=t, count=count, mean=mean, ci_low=low, ci_high=high)) + return rows + + +def aggregate_derived_series( + runs: list[list[GenerationSummary]], + fn, +) -> list[SeriesCI]: + by_t: dict[int, list[float]] = {} + for run in runs: + for summary in run: + by_t.setdefault(int(summary.t), []).append(float(fn(summary))) + rows: list[SeriesCI] = [] + for t in sorted(by_t): + count, mean, low, high = confidence_interval(by_t[t]) + rows.append(SeriesCI(t=t, count=count, mean=mean, ci_low=low, ci_high=high)) + return rows + + +def plot_series(series: list[SeriesCI], ylabel: str, title: str, outpath: Path) -> None: + x = np.array([row.t for row in series], dtype=float) + y = np.array([row.mean for row in series], dtype=float) + low = np.array([row.ci_low for row in series], dtype=float) + high = np.array([row.ci_high for row in series], dtype=float) + plt.figure(figsize=(9, 4.8)) + plt.plot(x, y, linewidth=2) + plt.fill_between(x, low, high, alpha=0.25) + plt.xlabel("Generation t") + plt.ylabel(ylabel) + plt.title(title) + plt.tight_layout() + plt.savefig(outpath, dpi=150) + plt.close() + + +def plot_series_with_reference( + series: list[SeriesCI], + ylabel: str, + title: str, + outpath: Path, + ref_value: float, + ref_label: str, +) -> None: + x = np.array([row.t for row in series], dtype=float) + y = np.array([row.mean for row in series], dtype=float) + low = np.array([row.ci_low for row in series], dtype=float) + high = np.array([row.ci_high for row in series], dtype=float) + plt.figure(figsize=(9, 4.8)) + plt.plot(x, y, linewidth=2, label="Observed mean") + plt.fill_between(x, low, high, alpha=0.25) + plt.axhline(ref_value, linestyle="--", linewidth=2, label=ref_label) + plt.xlabel("Generation t") + plt.ylabel(ylabel) + plt.title(title) + plt.legend() + plt.tight_layout() + plt.savefig(outpath, dpi=150) + plt.close() + + +def plot_mean_allele_vs_target( + allele_series: list[SeriesCI], + target_series: list[SeriesCI], + outpath: Path, +) -> None: + x = np.array([row.t for row in allele_series], dtype=float) + allele_mean = np.array([row.mean for row in allele_series], dtype=float) + allele_low = np.array([row.ci_low for row in allele_series], dtype=float) + allele_high = np.array([row.ci_high for row in allele_series], dtype=float) + target_mean = np.array([row.mean for row in target_series], dtype=float) + plt.figure(figsize=(9, 4.8)) + plt.plot(x, allele_mean, linewidth=2, label="Mean allele value") + plt.fill_between(x, allele_low, allele_high, alpha=0.25) + plt.plot(x, target_mean, linewidth=2, linestyle="--", label="Selective target t/T") + plt.xlabel("Generation t") + plt.ylabel("Allele-value scale") + plt.title("Mean Allele Value vs Selective Target") + plt.legend() + plt.tight_layout() + plt.savefig(outpath, dpi=150) + plt.close() + + +def render_markdown_report( + params: Track1Parameters, + runs: int, + seeds: list[int], + tracking_rows: list[dict], + figures: dict[str, str], +) -> str: + lines = [ + "# Track 1 Run Report", + "", + "## Parameters", + "", + ] + param_items = list(asdict(params).items()) + insertion_index = next((idx + 1 for idx, (key, _) in enumerate(param_items) if key == "u"), len(param_items)) + param_items.insert(insertion_index, ("M", params.M)) + for key, value in param_items: + lines.append(f"- `{key}`: `{value}`") + lines.extend( + [ + f"- `sim_runs`: `{runs}`", + f"- `seed_start`: `{seeds[0] if seeds else 0}`", + "", + "## Tracking Summary By Run", + "", + "| Run Seed | Extinction Occurred | First Extinction t | First Nonzero Allele t | Last Nonzero Allele t | Stayed Zero After Init | Final Mean Allele | Final Target | Final Gap | Mean |gap| | Max |gap| |", + "| --- | --- | ---: | ---: | ---: | --- | ---: | ---: | ---: | ---: | ---: |", + ] + ) + for row in tracking_rows: + lines.append( + f"| `{row['seed']}` | `{row['extinction_occurred']}` | `{row['first_extinction_t']}` | " + f"`{row['first_nonzero_allele_t']}` | `{row['last_nonzero_allele_t']}` | " + f"`{row['stayed_zero_after_initialization']}` | `{row['final_mean_allele_value']:.6f}` | " + f"`{row['final_target_value']:.6f}` | `{row['final_tracking_gap']:.6f}` | " + f"`{row['mean_abs_tracking_gap']:.6f}` | `{row['max_abs_tracking_gap']:.6f}` |" + ) + lines.extend(["", "## Figures", ""]) + for label, relpath in figures.items(): + lines.extend([f"### {label}", "", f"![{label}]({relpath})", ""]) + return "\n".join(lines) + "\n" + + +def generate_report_bundle( + params: Track1Parameters, + runs: int, + seed_start: int, + report_dir: str | Path, +) -> dict: + outdir = Path(report_dir) + outdir.mkdir(parents=True, exist_ok=True) + + run_summaries: list[list[GenerationSummary]] = [] + tracking_rows: list[dict] = [] + seeds = [seed_start + idx for idx in range(runs)] + for seed in seeds: + summaries = simulate_run(params, seed=seed) + run_summaries.append(summaries) + tracking = summarize_tracking(summaries) + tracking_rows.append({"seed": seed, **asdict(tracking)}) + + figure_specs = { + "Female Fecundity f": ("fecundity", "Female fecundity f", "figure_fecundity.png"), + "Mean Fitness w": ("mean_fitness", "Mean offspring survival w", "figure_fitness.png"), + "Expected Female Productivity f*w": ( + "mean_expected_female_productivity", + "Mean expected female productivity", + "figure_expected_productivity.png", + ), + "Birth Count": ("birth_count", "Birth count", "figure_birth_count.png"), + "Surviving Offspring Count": ( + "surviving_offspring_count", + "Surviving offspring count", + "figure_survivor_count.png", + ), + "Population Size N": ("N", "Population size N", "figure_population_size.png"), + "Tracking Gap": ("mean_tracking_gap", "Mean tracking gap", "figure_tracking_gap.png"), + "Selective Target": ("target_value", "Selective target t/T", "figure_target_value.png"), + } + derived_specs = { + "Survival Fraction": ( + lambda s: 0.0 if s.birth_count == 0 else s.surviving_offspring_count / s.birth_count, + "Survivors / births", + "figure_survival_fraction.png", + ), + "Fecundity Excess f - 2": ( + lambda s: s.fecundity - 2.0, + "Female fecundity excess over replacement", + "figure_fecundity_excess.png", + ), + } + + aggregate_payload: dict[str, list[dict]] = {} + figure_paths: dict[str, str] = {} + for label, (attr, ylabel, filename) in figure_specs.items(): + series = aggregate_series(run_summaries, attr) + aggregate_payload[attr] = [asdict(row) for row in series] + plot_series(series, ylabel=ylabel, title=label, outpath=outdir / filename) + figure_paths[label] = filename + for label, (fn, ylabel, filename) in derived_specs.items(): + series = aggregate_derived_series(run_summaries, fn) + aggregate_payload[filename.removesuffix(".png")] = [asdict(row) for row in series] + plot_series(series, ylabel=ylabel, title=label, outpath=outdir / filename) + figure_paths[label] = filename + + allele_series = aggregate_series(run_summaries, "mean_allele_value") + target_series = aggregate_series(run_summaries, "target_value") + aggregate_payload["mean_allele_overlay"] = { + "mean_allele_value": [asdict(row) for row in allele_series], + "target_value": [asdict(row) for row in target_series], + } + plot_mean_allele_vs_target( + allele_series=allele_series, + target_series=target_series, + outpath=outdir / "figure_mean_allele_vs_target.png", + ) + figure_paths["Mean Allele Value vs Selective Target"] = "figure_mean_allele_vs_target.png" + + realized_m_series = aggregate_series(run_summaries, "realized_mutation_count") + aggregate_payload["realized_M"] = [asdict(row) for row in realized_m_series] + plot_series_with_reference( + realized_m_series, + ylabel="Mutation count per generation", + title="Realized M vs Expected M", + outpath=outdir / "figure_realized_M.png", + ref_value=2.0 * params.K * params.u, + ref_label="Expected M = 2Ku", + ) + figure_paths["Realized M vs Expected M"] = "figure_realized_M.png" + + (outdir / "aggregate_series.json").write_text( + json.dumps(aggregate_payload, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + (outdir / "tracking_summary.json").write_text( + json.dumps(tracking_rows, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + report_text = render_markdown_report( + params=params, + runs=runs, + seeds=seeds, + tracking_rows=tracking_rows, + figures=figure_paths, + ) + report_path = outdir / "report.md" + report_path.write_text(report_text, encoding="utf-8") + return { + "mode": "report", + "parameters": asdict(params), + "derived_M": params.M, + "runs": runs, + "seed_start": seed_start, + "report_dir": str(outdir), + "report_path": str(report_path), + "figures": figure_paths, + "tracking_summary": tracking_rows, + } diff --git a/tests/test_track1_report.py b/tests/test_track1_report.py new file mode 100644 index 0000000..2b2973b --- /dev/null +++ b/tests/test_track1_report.py @@ -0,0 +1,44 @@ +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +import renunney.track1_api as api + + +def test_run_config_report_mode_writes_report_bundle(tmp_path: Path): + report_dir = tmp_path / "report" + config = api.Track1RunConfig( + mode="report", + K=5000, + N0=20, + n=1, + u=5e-6, + R=10.0, + T=20, + runs=2, + seed=1, + report_dir=str(report_dir), + ) + payload = api.run_config(config) + assert payload["mode"] == "report" + report_path = Path(payload["report_path"]) + assert report_path.exists() + report_text = report_path.read_text(encoding="utf-8") + assert "- `K`: `5000`" in report_text + assert "- `u`: `5e-06`" in report_text + assert "- `M`: `0.05`" in report_text + assert "Extinction Occurred" in report_text + assert "First Extinction t" in report_text + assert (report_dir / "aggregate_series.json").exists() + assert (report_dir / "tracking_summary.json").exists() + assert (report_dir / "figure_fecundity.png").exists() + assert (report_dir / "figure_fitness.png").exists() + assert (report_dir / "figure_expected_productivity.png").exists() + assert (report_dir / "figure_realized_M.png").exists() + assert (report_dir / "figure_survival_fraction.png").exists() + assert (report_dir / "figure_fecundity_excess.png").exists() + assert (report_dir / "figure_mean_allele_vs_target.png").exists()