Migrate Track 1 report layer into renunney

2026-04-11 06:45:21 -04:00 · 2026-04-11 06:45:21 -04:00 · 8e4b4eb216
parent 7c9fcd0dd4
commit 8e4b4eb216
8 changed files with 382 additions and 14 deletions
--- a/19
+++ b/19
@ -7,6 +7,7 @@ TRACK1 := $(REPO_ROOT)/scripts/run_track1.py
 DB := $(REPO_ROOT)/runs/state/cos-orch.sqlite
 RESULT_ROOT := $(REPO_ROOT)/runs/results
 SCRATCH_ROOT := $(REPO_ROOT)/runs/scratch
 MPLCONFIGDIR := $(SCRATCH_ROOT)/matplotlib
 FIG1_M005 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_05.json
 FIG1_M025 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_25.json
@ -38,7 +39,7 @@ help:
 	@echo "  results-tree        List the current result files"
 init:
-	mkdir -p $(REPO_ROOT)/runs/state $(REPO_ROOT)/runs/results $(REPO_ROOT)/runs/scratch
+	mkdir -p $(REPO_ROOT)/runs/state $(REPO_ROOT)/runs/results $(REPO_ROOT)/runs/scratch $(MPLCONFIGDIR)
 	$(PYTHON) $(ORCH) init-db --db $(DB)
 doctor:
@ -49,6 +50,7 @@ doctor:
 	@echo "DB=$(DB)"
 	@echo "RESULT_ROOT=$(RESULT_ROOT)"
 	@echo "SCRATCH_ROOT=$(SCRATCH_ROOT)"
 	@echo "MPLCONFIGDIR=$(MPLCONFIGDIR)"
 	test -f $(ORCH)
 	test -f $(TRACK1)
 	test -d $(LEGACY_ROOT)/python
@ -57,19 +59,24 @@ list-jobs:
 	$(PYTHON) $(ORCH) list --db $(DB)
 track1-sim-smoke:
-	$(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1
+	mkdir -p $(MPLCONFIGDIR)
 	MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1
 run-one:
-	$(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT)
+	mkdir -p $(MPLCONFIGDIR)
 	MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT)
 run-loop:
-	$(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT)
+	mkdir -p $(MPLCONFIGDIR)
 	MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT)
 run-loop-one:
-	$(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) --max-jobs 1
+	mkdir -p $(MPLCONFIGDIR)
 	MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) run-loop --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) --max-jobs 1
 collate-figure1:
-	$(PYTHON) $(ORCH) collate-figure1 --db $(DB) --output $(RESULT_ROOT)/figure1-collated.json
+	mkdir -p $(MPLCONFIGDIR)
 	MPLCONFIGDIR=$(MPLCONFIGDIR) $(PYTHON) $(ORCH) collate-figure1 --db $(DB) --output $(RESULT_ROOT)/figure1-collated.json
 submit-figure1-m005:
 	$(PYTHON) $(ORCH) submit-figure1 --db $(DB) --config $(FIG1_M005) --job-prefix fig1-m005 --created-by make
--- a/README.md
+++ b/README.md
@ -25,6 +25,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
 - a local Track 1 analysis layer for tracking summaries and loci-regression,
 - a local Track 1 threshold/search layer for Nunney-style threshold checks,
 - a local Track 1 simulation kernel,
 - a local Track 1 report generator,
 - a Makefile for common tasks,
 - migration notes for pulling code into this repo in stages.
@ -88,7 +89,8 @@ The current state is split:
 - Track 1 analysis layer: local to `renunney`
 - Track 1 threshold/search layer: local to `renunney`
 - Track 1 simulation kernel: local to `renunney`
- Track 1 report, dataset, fit, and extinction-model helpers: still imported
+- Track 1 report generator: local to `renunney`
 - Track 1 dataset, fit, and extinction-model helpers: still imported
  from the older `cost_of_substitution` directory through the local
  compatibility layer
--- a/docs/MIGRATION.md
+++ b/docs/MIGRATION.md
@ -33,13 +33,14 @@ Operational code still lives in:
   - `src/renunney/track1_threshold.py`
 6. Track 1 simulation kernel has been migrated locally:
   - `src/renunney/track1_reference.py`
-7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules next:
+7. Track 1 report layer has been migrated locally:
-   - `python/track1_report.py`
+   - `src/renunney/track1_report.py`
 8. Migrate dataset, fit, and orchestration-adjacent Track 1 modules next:
   - `python/track1_dataset.py`
   - `python/track1_fit.py`
   - `python/track1_extinction.py`
-8. Reduce or remove the remaining compatibility-layer imports after those modules are local.
+9. Reduce or remove the remaining compatibility-layer imports after those modules are local.
-9. Migrate docs and example configs last, after path references are updated.
+10. Migrate docs and example configs last, after path references are updated.
 ## Constraint
--- a/docs/WORKFLOW.md
+++ b/docs/WORKFLOW.md
@ -48,8 +48,8 @@ make status
 The Makefile now drives the local orchestration code in `renunney`, while the
 Track 1 runner/API boundary, analysis layer, threshold/search layer, and
-simulation kernel are also local to `renunney`. The remaining Track 1
+simulation kernel and report generator are also local to `renunney`. The
-report/dataset/fit helpers are still imported from the legacy
+remaining Track 1 dataset/fit helpers are still imported from the legacy
 `cost_of_substitution` directory through the compatibility layer in
 `src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission
 are now local to `renunney/config`.
--- a/src/renunney/init.py
+++ b/src/renunney/init.py
@ -48,6 +48,17 @@ from .track1_reference import (
    simulate_run,
    summarize_generation,
 )
 from .track1_report import (
    SeriesCI,
    aggregate_derived_series,
    aggregate_series,
    confidence_interval,
    generate_report_bundle,
    plot_mean_allele_vs_target,
    plot_series,
    plot_series_with_reference,
    render_markdown_report,
 )
 from .track1_threshold import (
    ThresholdCheck,
    ThresholdSearchResult,
@ -81,6 +92,7 @@ __all__ = [
    "repo_root",
    "run_one_job",
    "run_worker_loop",
    "SeriesCI",
    "submit_job_manifest",
    "submit_track1_figure1_jobs",
    "ThresholdCheck",
@ -99,13 +111,18 @@ __all__ = [
    "fit_linear_cost_by_loci",
    "generation_metrics",
    "genotype_fitness",
    "generate_report_bundle",
    "initialize_population",
    "is_extinct",
    "load_config",
    "nunney_threshold_accepts",
    "paper_mutation_supply_M",
    "published_threshold_accepts",
    "plot_mean_allele_vs_target",
    "plot_series",
    "plot_series_with_reference",
    "realize_birth_counts",
    "render_markdown_report",
    "run_config",
    "simulate_one_generation",
    "simulate_run",
@ -115,4 +132,7 @@ __all__ = [
    "summarize_tracking",
    "summarize_generation",
    "sweep_number_of_loci",
    "aggregate_derived_series",
    "aggregate_series",
    "confidence_interval",
 ]
--- a/src/renunney/track1_api.py
+++ b/src/renunney/track1_api.py
@ -17,13 +17,13 @@ from typing import Any, Optional
 from .legacy import ensure_legacy_python_path
 from .track1_analysis import summarize_tracking, sweep_number_of_loci
 from .track1_reference import Track1Parameters, simulate_run
 from .track1_report import generate_report_bundle
 from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
 ensure_legacy_python_path()
 from track1_dataset import generate_extinction_dataset
 from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
 from track1_report import generate_report_bundle
@dataclass(frozen=True, init=False)
--- a/src/renunney/track1_report.py
+++ b/src/renunney/track1_report.py
@ -0,0 +1,294 @@
 """
 track1_report.py
 Local report generation for Track 1 simulation runs.
 """
 from __future__ import annotations
 from dataclasses import asdict, dataclass
 import json
 from pathlib import Path
 from typing import Iterable
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import numpy as np
 from .track1_analysis import summarize_tracking
 from .track1_reference import GenerationSummary, Track1Parameters, simulate_run
@dataclass(frozen=True)
 class SeriesCI:
    t: int
    count: int
    mean: float
    ci_low: float
    ci_high: float
 def confidence_interval(values: Iterable[float]) -> tuple[int, float, float, float]:
    arr = np.array(list(values), dtype=float)
    count = int(arr.size)
    if count == 0:
        return 0, 0.0, 0.0, 0.0
    mean = float(np.mean(arr))
    if count == 1:
        return count, mean, mean, mean
    sem = float(np.std(arr, ddof=1) / np.sqrt(count))
    margin = 1.96 * sem
    return count, mean, mean - margin, mean + margin
 def aggregate_series(runs: list[list[GenerationSummary]], attr: str) -> list[SeriesCI]:
    by_t: dict[int, list[float]] = {}
    for run in runs:
        for summary in run:
            by_t.setdefault(int(summary.t), []).append(float(getattr(summary, attr)))
    rows: list[SeriesCI] = []
    for t in sorted(by_t):
        count, mean, low, high = confidence_interval(by_t[t])
        rows.append(SeriesCI(t=t, count=count, mean=mean, ci_low=low, ci_high=high))
    return rows
 def aggregate_derived_series(
    runs: list[list[GenerationSummary]],
    fn,
 ) -> list[SeriesCI]:
    by_t: dict[int, list[float]] = {}
    for run in runs:
        for summary in run:
            by_t.setdefault(int(summary.t), []).append(float(fn(summary)))
    rows: list[SeriesCI] = []
    for t in sorted(by_t):
        count, mean, low, high = confidence_interval(by_t[t])
        rows.append(SeriesCI(t=t, count=count, mean=mean, ci_low=low, ci_high=high))
    return rows
 def plot_series(series: list[SeriesCI], ylabel: str, title: str, outpath: Path) -> None:
    x = np.array([row.t for row in series], dtype=float)
    y = np.array([row.mean for row in series], dtype=float)
    low = np.array([row.ci_low for row in series], dtype=float)
    high = np.array([row.ci_high for row in series], dtype=float)
    plt.figure(figsize=(9, 4.8))
    plt.plot(x, y, linewidth=2)
    plt.fill_between(x, low, high, alpha=0.25)
    plt.xlabel("Generation t")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.tight_layout()
    plt.savefig(outpath, dpi=150)
    plt.close()
 def plot_series_with_reference(
    series: list[SeriesCI],
    ylabel: str,
    title: str,
    outpath: Path,
    ref_value: float,
    ref_label: str,
 ) -> None:
    x = np.array([row.t for row in series], dtype=float)
    y = np.array([row.mean for row in series], dtype=float)
    low = np.array([row.ci_low for row in series], dtype=float)
    high = np.array([row.ci_high for row in series], dtype=float)
    plt.figure(figsize=(9, 4.8))
    plt.plot(x, y, linewidth=2, label="Observed mean")
    plt.fill_between(x, low, high, alpha=0.25)
    plt.axhline(ref_value, linestyle="--", linewidth=2, label=ref_label)
    plt.xlabel("Generation t")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outpath, dpi=150)
    plt.close()
 def plot_mean_allele_vs_target(
    allele_series: list[SeriesCI],
    target_series: list[SeriesCI],
    outpath: Path,
 ) -> None:
    x = np.array([row.t for row in allele_series], dtype=float)
    allele_mean = np.array([row.mean for row in allele_series], dtype=float)
    allele_low = np.array([row.ci_low for row in allele_series], dtype=float)
    allele_high = np.array([row.ci_high for row in allele_series], dtype=float)
    target_mean = np.array([row.mean for row in target_series], dtype=float)
    plt.figure(figsize=(9, 4.8))
    plt.plot(x, allele_mean, linewidth=2, label="Mean allele value")
    plt.fill_between(x, allele_low, allele_high, alpha=0.25)
    plt.plot(x, target_mean, linewidth=2, linestyle="--", label="Selective target t/T")
    plt.xlabel("Generation t")
    plt.ylabel("Allele-value scale")
    plt.title("Mean Allele Value vs Selective Target")
    plt.legend()
    plt.tight_layout()
    plt.savefig(outpath, dpi=150)
    plt.close()
 def render_markdown_report(
    params: Track1Parameters,
    runs: int,
    seeds: list[int],
    tracking_rows: list[dict],
    figures: dict[str, str],
 ) -> str:
    lines = [
        "# Track 1 Run Report",
        "",
        "## Parameters",
        "",
    ]
    param_items = list(asdict(params).items())
    insertion_index = next((idx + 1 for idx, (key, _) in enumerate(param_items) if key == "u"), len(param_items))
    param_items.insert(insertion_index, ("M", params.M))
    for key, value in param_items:
        lines.append(f"- `{key}`: `{value}`")
    lines.extend(
        [
            f"- `sim_runs`: `{runs}`",
            f"- `seed_start`: `{seeds[0] if seeds else 0}`",
            "",
            "## Tracking Summary By Run",
            "",
            "| Run Seed | Extinction Occurred | First Extinction t | First Nonzero Allele t | Last Nonzero Allele t | Stayed Zero After Init | Final Mean Allele | Final Target | Final Gap | Mean |gap| | Max |gap| |",
            "| --- | --- | ---: | ---: | ---: | --- | ---: | ---: | ---: | ---: | ---: |",
        ]
    )
    for row in tracking_rows:
        lines.append(
            f"| `{row['seed']}` | `{row['extinction_occurred']}` | `{row['first_extinction_t']}` | "
            f"`{row['first_nonzero_allele_t']}` | `{row['last_nonzero_allele_t']}` | "
            f"`{row['stayed_zero_after_initialization']}` | `{row['final_mean_allele_value']:.6f}` | "
            f"`{row['final_target_value']:.6f}` | `{row['final_tracking_gap']:.6f}` | "
            f"`{row['mean_abs_tracking_gap']:.6f}` | `{row['max_abs_tracking_gap']:.6f}` |"
        )
    lines.extend(["", "## Figures", ""])
    for label, relpath in figures.items():
        lines.extend([f"### {label}", "", f"![{label}]({relpath})", ""])
    return "\n".join(lines) + "\n"
 def generate_report_bundle(
    params: Track1Parameters,
    runs: int,
    seed_start: int,
    report_dir: str | Path,
 ) -> dict:
    outdir = Path(report_dir)
    outdir.mkdir(parents=True, exist_ok=True)
    run_summaries: list[list[GenerationSummary]] = []
    tracking_rows: list[dict] = []
    seeds = [seed_start + idx for idx in range(runs)]
    for seed in seeds:
        summaries = simulate_run(params, seed=seed)
        run_summaries.append(summaries)
        tracking = summarize_tracking(summaries)
        tracking_rows.append({"seed": seed, **asdict(tracking)})
    figure_specs = {
        "Female Fecundity f": ("fecundity", "Female fecundity f", "figure_fecundity.png"),
        "Mean Fitness w": ("mean_fitness", "Mean offspring survival w", "figure_fitness.png"),
        "Expected Female Productivity f*w": (
            "mean_expected_female_productivity",
            "Mean expected female productivity",
            "figure_expected_productivity.png",
        ),
        "Birth Count": ("birth_count", "Birth count", "figure_birth_count.png"),
        "Surviving Offspring Count": (
            "surviving_offspring_count",
            "Surviving offspring count",
            "figure_survivor_count.png",
        ),
        "Population Size N": ("N", "Population size N", "figure_population_size.png"),
        "Tracking Gap": ("mean_tracking_gap", "Mean tracking gap", "figure_tracking_gap.png"),
        "Selective Target": ("target_value", "Selective target t/T", "figure_target_value.png"),
    }
    derived_specs = {
        "Survival Fraction": (
            lambda s: 0.0 if s.birth_count == 0 else s.surviving_offspring_count / s.birth_count,
            "Survivors / births",
            "figure_survival_fraction.png",
        ),
        "Fecundity Excess f - 2": (
            lambda s: s.fecundity - 2.0,
            "Female fecundity excess over replacement",
            "figure_fecundity_excess.png",
        ),
    }
    aggregate_payload: dict[str, list[dict]] = {}
    figure_paths: dict[str, str] = {}
    for label, (attr, ylabel, filename) in figure_specs.items():
        series = aggregate_series(run_summaries, attr)
        aggregate_payload[attr] = [asdict(row) for row in series]
        plot_series(series, ylabel=ylabel, title=label, outpath=outdir / filename)
        figure_paths[label] = filename
    for label, (fn, ylabel, filename) in derived_specs.items():
        series = aggregate_derived_series(run_summaries, fn)
        aggregate_payload[filename.removesuffix(".png")] = [asdict(row) for row in series]
        plot_series(series, ylabel=ylabel, title=label, outpath=outdir / filename)
        figure_paths[label] = filename
    allele_series = aggregate_series(run_summaries, "mean_allele_value")
    target_series = aggregate_series(run_summaries, "target_value")
    aggregate_payload["mean_allele_overlay"] = {
        "mean_allele_value": [asdict(row) for row in allele_series],
        "target_value": [asdict(row) for row in target_series],
    }
    plot_mean_allele_vs_target(
        allele_series=allele_series,
        target_series=target_series,
        outpath=outdir / "figure_mean_allele_vs_target.png",
    )
    figure_paths["Mean Allele Value vs Selective Target"] = "figure_mean_allele_vs_target.png"
    realized_m_series = aggregate_series(run_summaries, "realized_mutation_count")
    aggregate_payload["realized_M"] = [asdict(row) for row in realized_m_series]
    plot_series_with_reference(
        realized_m_series,
        ylabel="Mutation count per generation",
        title="Realized M vs Expected M",
        outpath=outdir / "figure_realized_M.png",
        ref_value=2.0 * params.K * params.u,
        ref_label="Expected M = 2Ku",
    )
    figure_paths["Realized M vs Expected M"] = "figure_realized_M.png"
    (outdir / "aggregate_series.json").write_text(
        json.dumps(aggregate_payload, indent=2, sort_keys=True) + "\n",
        encoding="utf-8",
    )
    (outdir / "tracking_summary.json").write_text(
        json.dumps(tracking_rows, indent=2, sort_keys=True) + "\n",
        encoding="utf-8",
    )
    report_text = render_markdown_report(
        params=params,
        runs=runs,
        seeds=seeds,
        tracking_rows=tracking_rows,
        figures=figure_paths,
    )
    report_path = outdir / "report.md"
    report_path.write_text(report_text, encoding="utf-8")
    return {
        "mode": "report",
        "parameters": asdict(params),
        "derived_M": params.M,
        "runs": runs,
        "seed_start": seed_start,
        "report_dir": str(outdir),
        "report_path": str(report_path),
        "figures": figure_paths,
        "tracking_summary": tracking_rows,
    }
--- a/tests/test_track1_report.py
+++ b/tests/test_track1_report.py
@ -0,0 +1,44 @@
 import sys
 from pathlib import Path
 ROOT = Path(__file__).resolve().parents[1]
 SRC_DIR = ROOT / "src"
 if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))
 import renunney.track1_api as api
 def test_run_config_report_mode_writes_report_bundle(tmp_path: Path):
    report_dir = tmp_path / "report"
    config = api.Track1RunConfig(
        mode="report",
        K=5000,
        N0=20,
        n=1,
        u=5e-6,
        R=10.0,
        T=20,
        runs=2,
        seed=1,
        report_dir=str(report_dir),
    )
    payload = api.run_config(config)
    assert payload["mode"] == "report"
    report_path = Path(payload["report_path"])
    assert report_path.exists()
    report_text = report_path.read_text(encoding="utf-8")
    assert "- `K`: `5000`" in report_text
    assert "- `u`: `5e-06`" in report_text
    assert "- `M`: `0.05`" in report_text
    assert "Extinction Occurred" in report_text
    assert "First Extinction t" in report_text
    assert (report_dir / "aggregate_series.json").exists()
    assert (report_dir / "tracking_summary.json").exists()
    assert (report_dir / "figure_fecundity.png").exists()
    assert (report_dir / "figure_fitness.png").exists()
    assert (report_dir / "figure_expected_productivity.png").exists()
    assert (report_dir / "figure_realized_M.png").exists()
    assert (report_dir / "figure_survival_fraction.png").exists()
    assert (report_dir / "figure_fecundity_excess.png").exists()
    assert (report_dir / "figure_mean_allele_vs_target.png").exists()