Migrate Track 1 analysis layer into renunney

2026-04-11 06:28:41 -04:00 · 2026-04-11 06:28:41 -04:00 · acbb90f452
parent a6d1326165
commit acbb90f452
8 changed files with 380 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -22,6 +22,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
 - a local orchestration CLI and library,
 - local paper-scale Figure 1 submission configs,
 - a local Track 1 runner and config/API layer,
+- a local Track 1 analysis layer for tracking summaries and loci-regression,
 - a Makefile for common tasks,
 - migration notes for pulling code into this repo in stages.

@ -82,6 +83,7 @@ The current state is split:

 - orchestration control plane: local to `renunney`
 - Track 1 runner and config/API layer: local to `renunney`
+- Track 1 analysis layer: local to `renunney`
 - Track 1 simulation backend: still in the older `cost_of_substitution`
  directory and imported through the local compatibility layer

--- a/docs/MIGRATION.md
+++ b/docs/MIGRATION.md
@ -27,17 +27,19 @@ Operational code still lives in:
 3. Track 1 runner and API boundary have been migrated locally:
   - `scripts/run_track1.py`
   - `src/renunney/track1_api.py`
-4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
-5. Migrate the Track 1 simulation core after the runner path is stable:
+4. Track 1 analysis boundary has been migrated locally:
+   - `src/renunney/track1_analysis.py`
+5. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
+6. Migrate the Track 1 simulation core after the runner path is stable:
   - `python/track1_reference.py`
   - `python/track1_threshold.py`
   - `python/track1_analysis.py`
-6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
+7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
   - `python/track1_report.py`
   - `python/track1_dataset.py`
   - `python/track1_fit.py`
   - `python/track1_extinction.py`
-7. Migrate docs and example configs last, after path references are updated.
+8. Migrate docs and example configs last, after path references are updated.

 ## Constraint

--- a/docs/WORKFLOW.md
+++ b/docs/WORKFLOW.md
@ -47,7 +47,8 @@ make status
 ## Current Assumption

 The Makefile now drives the local orchestration code in `renunney`, while the
-Track 1 runner/API boundary is also local to `renunney`. The simulation kernel
-is still imported from the legacy `cost_of_substitution` directory through the
-compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
-configs used for submission are now local to `renunney/config`.
+Track 1 runner/API boundary and analysis layer are also local to `renunney`.
+The simulation kernel is still imported from the legacy `cost_of_substitution`
+directory through the compatibility layer in `src/renunney/legacy.py`. The
+paper-scale Figure 1 configs used for submission are now local to
+`renunney/config`.
--- a/src/renunney/init.py
+++ b/src/renunney/init.py
@ -18,6 +18,15 @@ from .orchestration import (
    submit_job_manifest,
    submit_track1_figure1_jobs,
 )
+from .track1_analysis import (
+    LinearCostFit,
+    LocusThresholdRow,
+    NumberOfLociSweep,
+    TrackingSummary,
+    fit_linear_cost_by_loci,
+    summarize_tracking,
+    sweep_number_of_loci,
+)
 from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload

 __all__ = [
@ -35,14 +44,21 @@ __all__ = [
    "list_job_results",
    "list_jobs",
    "load_job_manifest",
+    "LinearCostFit",
+    "LocusThresholdRow",
+    "NumberOfLociSweep",
    "repo_root",
    "run_one_job",
    "run_worker_loop",
    "submit_job_manifest",
    "submit_track1_figure1_jobs",
+    "TrackingSummary",
    "Track1RunConfig",
    "config_from_mapping",
+    "fit_linear_cost_by_loci",
    "load_config",
    "run_config",
    "save_payload",
+    "summarize_tracking",
+    "sweep_number_of_loci",
 ]
--- a/src/renunney/track1_analysis.py
+++ b/src/renunney/track1_analysis.py
@ -0,0 +1,170 @@
+"""
+track1_analysis.py
+
+Local Track 1 analysis helpers for renunney.
+
+This stage keeps the simulation kernel in the legacy tree while moving the
+analysis/reporting boundary inward.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable, Optional
+
+import numpy as np
+
+from .legacy import ensure_legacy_python_path
+
+ensure_legacy_python_path()
+
+from track1_reference import GenerationSummary, Track1Parameters
+from track1_threshold import ThresholdSearchResult, search_threshold_over_candidates
+
+
+@dataclass(frozen=True)
+class LocusThresholdRow:
+    """Threshold result for one number-of-loci setting."""
+
+    n: int
+    threshold_T: Optional[float]
+    accepted: bool
+
+
+@dataclass(frozen=True)
+class LinearCostFit:
+    """Linear regression C = C0 + n*C1."""
+
+    intercept_c0: float
+    slope_c1: float
+    r_squared: float
+    points_used: int
+
+
+@dataclass(frozen=True)
+class NumberOfLociSweep:
+    """Combined threshold rows and fitted regression."""
+
+    rows: list[LocusThresholdRow]
+    fit: Optional[LinearCostFit]
+
+
+@dataclass(frozen=True)
+class TrackingSummary:
+    """Condensed allele-tracking diagnostics for one simulation run."""
+
+    extinction_occurred: bool
+    first_extinction_t: Optional[int]
+    first_nonzero_allele_t: Optional[int]
+    last_nonzero_allele_t: Optional[int]
+    stayed_zero_after_initialization: bool
+    max_abs_tracking_gap: float
+    final_tracking_gap: float
+    mean_abs_tracking_gap: float
+    final_mean_allele_value: float
+    final_target_value: float
+
+
+def summarize_tracking(
+    summaries: Iterable[GenerationSummary],
+    zero_tol: float = 1.0e-15,
+) -> TrackingSummary:
+    summary_list = list(summaries)
+    if not summary_list:
+        return TrackingSummary(
+            extinction_occurred=False,
+            first_extinction_t=None,
+            first_nonzero_allele_t=None,
+            last_nonzero_allele_t=None,
+            stayed_zero_after_initialization=True,
+            max_abs_tracking_gap=0.0,
+            final_tracking_gap=0.0,
+            mean_abs_tracking_gap=0.0,
+            final_mean_allele_value=0.0,
+            final_target_value=0.0,
+        )
+
+    nonzero = [summary for summary in summary_list if abs(summary.mean_allele_value) > zero_tol]
+    extinct_rows = [summary for summary in summary_list if summary.extinct]
+    first_nonzero_t = None if not nonzero else int(nonzero[0].t)
+    last_nonzero_t = None if not nonzero else int(nonzero[-1].t)
+    first_extinction_t = None if not extinct_rows else int(extinct_rows[0].t)
+    post_init = summary_list[1:]
+    stayed_zero_after_initialization = all(
+        abs(summary.mean_allele_value) <= zero_tol for summary in post_init
+    )
+    abs_gaps = np.array([abs(summary.mean_tracking_gap) for summary in summary_list], dtype=float)
+    final = summary_list[-1]
+    return TrackingSummary(
+        extinction_occurred=bool(extinct_rows),
+        first_extinction_t=first_extinction_t,
+        first_nonzero_allele_t=first_nonzero_t,
+        last_nonzero_allele_t=last_nonzero_t,
+        stayed_zero_after_initialization=stayed_zero_after_initialization,
+        max_abs_tracking_gap=float(np.max(abs_gaps)),
+        final_tracking_gap=float(final.mean_tracking_gap),
+        mean_abs_tracking_gap=float(np.mean(abs_gaps)),
+        final_mean_allele_value=float(final.mean_allele_value),
+        final_target_value=float(final.target_value),
+    )
+
+
+def fit_linear_cost_by_loci(rows: Iterable[LocusThresholdRow]) -> Optional[LinearCostFit]:
+    usable = [row for row in rows if row.accepted and row.threshold_T is not None]
+    if len(usable) < 2:
+        return None
+
+    x = np.array([row.n for row in usable], dtype=float)
+    y = np.array([row.threshold_T for row in usable], dtype=float)
+    slope, intercept = np.polyfit(x, y, 1)
+    yhat = intercept + slope * x
+    ss_res = float(np.sum((y - yhat) ** 2))
+    ss_tot = float(np.sum((y - np.mean(y)) ** 2))
+    r_squared = 1.0 if ss_tot == 0.0 else 1.0 - (ss_res / ss_tot)
+    return LinearCostFit(
+        intercept_c0=float(intercept),
+        slope_c1=float(slope),
+        r_squared=float(r_squared),
+        points_used=len(usable),
+    )
+
+
+def sweep_number_of_loci(
+    params: Track1Parameters,
+    loci_values: Iterable[int],
+    candidate_T_values: Iterable[float],
+    runs: int = 20,
+    seed_start: int = 0,
+    cache_path: str | None = None,
+    jobs: int = 1,
+) -> NumberOfLociSweep:
+    rows: list[LocusThresholdRow] = []
+    candidate_list = list(candidate_T_values)
+    for index, n_value in enumerate(loci_values):
+        run_params = Track1Parameters(
+            K=params.K,
+            N0=params.N0,
+            n=n_value,
+            u=params.u,
+            R=params.R,
+            T=params.T,
+            epochs=params.epochs,
+            p=params.p,
+            a_max=params.a_max,
+        )
+        result: Optional[ThresholdSearchResult] = search_threshold_over_candidates(
+            params=run_params,
+            candidate_T_values=candidate_list,
+            runs=runs,
+            seed_start=seed_start + (index * 100000),
+            cache_path=cache_path,
+            jobs=jobs,
+        )
+        rows.append(
+            LocusThresholdRow(
+                n=n_value,
+                threshold_T=None if result is None else float(result.threshold_T),
+                accepted=result is not None,
+            )
+        )
+    return NumberOfLociSweep(rows=rows, fit=fit_linear_cost_by_loci(rows))
--- a/src/renunney/track1_api.py
+++ b/src/renunney/track1_api.py
@ -15,10 +15,10 @@ from pathlib import Path
 from typing import Any, Optional

 from .legacy import ensure_legacy_python_path
+from .track1_analysis import summarize_tracking, sweep_number_of_loci

 ensure_legacy_python_path()

-from track1_analysis import summarize_tracking, sweep_number_of_loci
 from track1_dataset import generate_extinction_dataset
 from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
 from track1_reference import Track1Parameters, simulate_run
--- a/tests/test_track1_analysis.py
+++ b/tests/test_track1_analysis.py
@ -0,0 +1,179 @@
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SRC_DIR = ROOT / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
+
+import renunney.track1_analysis as analysis
+import renunney.track1_api as api
+
+from track1_reference import GenerationSummary, Track1Parameters
+
+
+def test_fit_linear_cost_by_loci_recovers_line():
+    rows = [
+        analysis.LocusThresholdRow(n=1, threshold_T=12.0, accepted=True),
+        analysis.LocusThresholdRow(n=2, threshold_T=14.0, accepted=True),
+        analysis.LocusThresholdRow(n=3, threshold_T=16.0, accepted=True),
+    ]
+    fit = analysis.fit_linear_cost_by_loci(rows)
+    assert fit is not None
+    assert abs(fit.intercept_c0 - 10.0) < 1e-9
+    assert abs(fit.slope_c1 - 2.0) < 1e-9
+
+
+def test_sweep_number_of_loci_uses_search_results(monkeypatch):
+    params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20)
+
+    class Dummy:
+        def __init__(self, threshold_T):
+            self.threshold_T = threshold_T
+
+    def fake_search(params, candidate_T_values, runs=20, seed_start=0, cache_path=None, jobs=1):
+        return Dummy(threshold_T=10.0 + params.n)
+
+    monkeypatch.setattr(analysis, "search_threshold_over_candidates", fake_search)
+    sweep = analysis.sweep_number_of_loci(params, [1, 2, 3], [10, 20, 30], runs=2, seed_start=1, jobs=3)
+    assert [row.threshold_T for row in sweep.rows] == [11.0, 12.0, 13.0]
+    assert sweep.fit is not None
+
+
+def test_run_config_loci_regression_mode(monkeypatch):
+    @dataclass(frozen=True)
+    class DummyFit:
+        intercept_c0: float = 5.0
+        slope_c1: float = 2.0
+        r_squared: float = 1.0
+        points_used: int = 3
+
+    class DummySweep:
+        rows = [
+            analysis.LocusThresholdRow(n=1, threshold_T=7.0, accepted=True),
+            analysis.LocusThresholdRow(n=2, threshold_T=9.0, accepted=True),
+            analysis.LocusThresholdRow(n=3, threshold_T=11.0, accepted=True),
+        ]
+        fit = DummyFit()
+
+    monkeypatch.setattr(api, "sweep_number_of_loci", lambda *args, **kwargs: DummySweep())
+    config = api.Track1RunConfig(
+        mode="loci_regression",
+        loci_values=[1, 2, 3],
+        t_start=10,
+        t_stop=30,
+        t_step=10,
+        runs=2,
+    )
+    payload = api.run_config(config)
+    assert payload["mode"] == "loci_regression"
+    assert payload["loci_values"] == [1, 2, 3]
+    assert payload["fit"]["slope_c1"] == 2.0
+
+
+def test_summarize_tracking_detects_post_initial_nonzero_alleles():
+    summaries = [
+        GenerationSummary(
+            t=-2,
+            N=10,
+            female_fraction=0.5,
+            male_count=5,
+            female_count=5,
+            fecundity=1.0,
+            mean_fitness=1.0,
+            mean_expected_female_productivity=1.0,
+            target_value=-0.1,
+            mean_allele_value=0.0,
+            mean_genotype_value=0.0,
+            mean_tracking_gap=0.1,
+            paper_M=0.05,
+            expected_mutations_current_N=0.0001,
+            realized_mutation_count=0,
+            realized_mutation_rate_per_allele=0.0,
+            birth_count=0,
+            surviving_offspring_count=0,
+            ne_approx=5.0,
+            extinct=False,
+        ),
+        GenerationSummary(
+            t=-1,
+            N=10,
+            female_fraction=0.5,
+            male_count=5,
+            female_count=5,
+            fecundity=1.0,
+            mean_fitness=1.0,
+            mean_expected_female_productivity=1.0,
+            target_value=-0.05,
+            mean_allele_value=0.2,
+            mean_genotype_value=0.2,
+            mean_tracking_gap=0.25,
+            paper_M=0.05,
+            expected_mutations_current_N=0.0001,
+            realized_mutation_count=1,
+            realized_mutation_rate_per_allele=0.05,
+            birth_count=2,
+            surviving_offspring_count=1,
+            ne_approx=5.0,
+            extinct=False,
+        ),
+    ]
+    tracking = analysis.summarize_tracking(summaries)
+    assert tracking.extinction_occurred is False
+    assert tracking.first_extinction_t is None
+    assert tracking.first_nonzero_allele_t == -1
+    assert tracking.last_nonzero_allele_t == -1
+    assert tracking.stayed_zero_after_initialization is False
+
+
+def test_summarize_tracking_detects_extinction_time():
+    summaries = [
+        GenerationSummary(
+            t=0,
+            N=10,
+            female_fraction=0.5,
+            male_count=5,
+            female_count=5,
+            fecundity=1.0,
+            mean_fitness=1.0,
+            mean_expected_female_productivity=1.0,
+            target_value=0.0,
+            mean_allele_value=0.0,
+            mean_genotype_value=0.0,
+            mean_tracking_gap=0.0,
+            paper_M=0.05,
+            expected_mutations_current_N=0.0001,
+            realized_mutation_count=0,
+            realized_mutation_rate_per_allele=0.0,
+            birth_count=0,
+            surviving_offspring_count=0,
+            ne_approx=5.0,
+            extinct=False,
+        ),
+        GenerationSummary(
+            t=1,
+            N=0,
+            female_fraction=0.0,
+            male_count=0,
+            female_count=0,
+            fecundity=0.0,
+            mean_fitness=0.0,
+            mean_expected_female_productivity=0.0,
+            target_value=0.1,
+            mean_allele_value=0.0,
+            mean_genotype_value=0.0,
+            mean_tracking_gap=-0.1,
+            paper_M=0.05,
+            expected_mutations_current_N=0.0,
+            realized_mutation_count=0,
+            realized_mutation_rate_per_allele=0.0,
+            birth_count=0,
+            surviving_offspring_count=0,
+            ne_approx=0.0,
+            extinct=True,
+        ),
+    ]
+    tracking = analysis.summarize_tracking(summaries)
+    assert tracking.extinction_occurred is True
+    assert tracking.first_extinction_t == 1
--- a/tests/test_track1_api.py
+++ b/tests/test_track1_api.py
@ -7,7 +7,7 @@ SRC_DIR = ROOT / "src"
 if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

-from renunney import track1_api as api
+import renunney.track1_api as api


 def test_run_config_simulate_mode_returns_contract():