From acbb90f45221e84c0aae59fa30d9e828d1c74487 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 11 Apr 2026 06:28:41 -0400 Subject: [PATCH] Migrate Track 1 analysis layer into renunney --- README.md | 2 + docs/MIGRATION.md | 10 +- docs/WORKFLOW.md | 9 +- src/renunney/__init__.py | 16 +++ src/renunney/track1_analysis.py | 170 ++++++++++++++++++++++++++++++ src/renunney/track1_api.py | 2 +- tests/test_track1_analysis.py | 179 ++++++++++++++++++++++++++++++++ tests/test_track1_api.py | 2 +- 8 files changed, 380 insertions(+), 10 deletions(-) create mode 100644 src/renunney/track1_analysis.py create mode 100644 tests/test_track1_analysis.py diff --git a/README.md b/README.md index 15fbadb..ccef234 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`. - a local orchestration CLI and library, - local paper-scale Figure 1 submission configs, - a local Track 1 runner and config/API layer, +- a local Track 1 analysis layer for tracking summaries and loci-regression, - a Makefile for common tasks, - migration notes for pulling code into this repo in stages. @@ -82,6 +83,7 @@ The current state is split: - orchestration control plane: local to `renunney` - Track 1 runner and config/API layer: local to `renunney` +- Track 1 analysis layer: local to `renunney` - Track 1 simulation backend: still in the older `cost_of_substitution` directory and imported through the local compatibility layer diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index 070f008..e245614 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -27,17 +27,19 @@ Operational code still lives in: 3. Track 1 runner and API boundary have been migrated locally: - `scripts/run_track1.py` - `src/renunney/track1_api.py` -4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. -5. Migrate the Track 1 simulation core after the runner path is stable: +4. Track 1 analysis boundary has been migrated locally: + - `src/renunney/track1_analysis.py` +5. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. +6. Migrate the Track 1 simulation core after the runner path is stable: - `python/track1_reference.py` - `python/track1_threshold.py` - `python/track1_analysis.py` -6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable: +7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable: - `python/track1_report.py` - `python/track1_dataset.py` - `python/track1_fit.py` - `python/track1_extinction.py` -7. Migrate docs and example configs last, after path references are updated. +8. Migrate docs and example configs last, after path references are updated. ## Constraint diff --git a/docs/WORKFLOW.md b/docs/WORKFLOW.md index 3721502..300aea4 100644 --- a/docs/WORKFLOW.md +++ b/docs/WORKFLOW.md @@ -47,7 +47,8 @@ make status ## Current Assumption The Makefile now drives the local orchestration code in `renunney`, while the -Track 1 runner/API boundary is also local to `renunney`. The simulation kernel -is still imported from the legacy `cost_of_substitution` directory through the -compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1 -configs used for submission are now local to `renunney/config`. +Track 1 runner/API boundary and analysis layer are also local to `renunney`. +The simulation kernel is still imported from the legacy `cost_of_substitution` +directory through the compatibility layer in `src/renunney/legacy.py`. The +paper-scale Figure 1 configs used for submission are now local to +`renunney/config`. diff --git a/src/renunney/__init__.py b/src/renunney/__init__.py index 9f37925..6aed776 100644 --- a/src/renunney/__init__.py +++ b/src/renunney/__init__.py @@ -18,6 +18,15 @@ from .orchestration import ( submit_job_manifest, submit_track1_figure1_jobs, ) +from .track1_analysis import ( + LinearCostFit, + LocusThresholdRow, + NumberOfLociSweep, + TrackingSummary, + fit_linear_cost_by_loci, + summarize_tracking, + sweep_number_of_loci, +) from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload __all__ = [ @@ -35,14 +44,21 @@ __all__ = [ "list_job_results", "list_jobs", "load_job_manifest", + "LinearCostFit", + "LocusThresholdRow", + "NumberOfLociSweep", "repo_root", "run_one_job", "run_worker_loop", "submit_job_manifest", "submit_track1_figure1_jobs", + "TrackingSummary", "Track1RunConfig", "config_from_mapping", + "fit_linear_cost_by_loci", "load_config", "run_config", "save_payload", + "summarize_tracking", + "sweep_number_of_loci", ] diff --git a/src/renunney/track1_analysis.py b/src/renunney/track1_analysis.py new file mode 100644 index 0000000..6101f21 --- /dev/null +++ b/src/renunney/track1_analysis.py @@ -0,0 +1,170 @@ +""" +track1_analysis.py + +Local Track 1 analysis helpers for renunney. + +This stage keeps the simulation kernel in the legacy tree while moving the +analysis/reporting boundary inward. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Iterable, Optional + +import numpy as np + +from .legacy import ensure_legacy_python_path + +ensure_legacy_python_path() + +from track1_reference import GenerationSummary, Track1Parameters +from track1_threshold import ThresholdSearchResult, search_threshold_over_candidates + + +@dataclass(frozen=True) +class LocusThresholdRow: + """Threshold result for one number-of-loci setting.""" + + n: int + threshold_T: Optional[float] + accepted: bool + + +@dataclass(frozen=True) +class LinearCostFit: + """Linear regression C = C0 + n*C1.""" + + intercept_c0: float + slope_c1: float + r_squared: float + points_used: int + + +@dataclass(frozen=True) +class NumberOfLociSweep: + """Combined threshold rows and fitted regression.""" + + rows: list[LocusThresholdRow] + fit: Optional[LinearCostFit] + + +@dataclass(frozen=True) +class TrackingSummary: + """Condensed allele-tracking diagnostics for one simulation run.""" + + extinction_occurred: bool + first_extinction_t: Optional[int] + first_nonzero_allele_t: Optional[int] + last_nonzero_allele_t: Optional[int] + stayed_zero_after_initialization: bool + max_abs_tracking_gap: float + final_tracking_gap: float + mean_abs_tracking_gap: float + final_mean_allele_value: float + final_target_value: float + + +def summarize_tracking( + summaries: Iterable[GenerationSummary], + zero_tol: float = 1.0e-15, +) -> TrackingSummary: + summary_list = list(summaries) + if not summary_list: + return TrackingSummary( + extinction_occurred=False, + first_extinction_t=None, + first_nonzero_allele_t=None, + last_nonzero_allele_t=None, + stayed_zero_after_initialization=True, + max_abs_tracking_gap=0.0, + final_tracking_gap=0.0, + mean_abs_tracking_gap=0.0, + final_mean_allele_value=0.0, + final_target_value=0.0, + ) + + nonzero = [summary for summary in summary_list if abs(summary.mean_allele_value) > zero_tol] + extinct_rows = [summary for summary in summary_list if summary.extinct] + first_nonzero_t = None if not nonzero else int(nonzero[0].t) + last_nonzero_t = None if not nonzero else int(nonzero[-1].t) + first_extinction_t = None if not extinct_rows else int(extinct_rows[0].t) + post_init = summary_list[1:] + stayed_zero_after_initialization = all( + abs(summary.mean_allele_value) <= zero_tol for summary in post_init + ) + abs_gaps = np.array([abs(summary.mean_tracking_gap) for summary in summary_list], dtype=float) + final = summary_list[-1] + return TrackingSummary( + extinction_occurred=bool(extinct_rows), + first_extinction_t=first_extinction_t, + first_nonzero_allele_t=first_nonzero_t, + last_nonzero_allele_t=last_nonzero_t, + stayed_zero_after_initialization=stayed_zero_after_initialization, + max_abs_tracking_gap=float(np.max(abs_gaps)), + final_tracking_gap=float(final.mean_tracking_gap), + mean_abs_tracking_gap=float(np.mean(abs_gaps)), + final_mean_allele_value=float(final.mean_allele_value), + final_target_value=float(final.target_value), + ) + + +def fit_linear_cost_by_loci(rows: Iterable[LocusThresholdRow]) -> Optional[LinearCostFit]: + usable = [row for row in rows if row.accepted and row.threshold_T is not None] + if len(usable) < 2: + return None + + x = np.array([row.n for row in usable], dtype=float) + y = np.array([row.threshold_T for row in usable], dtype=float) + slope, intercept = np.polyfit(x, y, 1) + yhat = intercept + slope * x + ss_res = float(np.sum((y - yhat) ** 2)) + ss_tot = float(np.sum((y - np.mean(y)) ** 2)) + r_squared = 1.0 if ss_tot == 0.0 else 1.0 - (ss_res / ss_tot) + return LinearCostFit( + intercept_c0=float(intercept), + slope_c1=float(slope), + r_squared=float(r_squared), + points_used=len(usable), + ) + + +def sweep_number_of_loci( + params: Track1Parameters, + loci_values: Iterable[int], + candidate_T_values: Iterable[float], + runs: int = 20, + seed_start: int = 0, + cache_path: str | None = None, + jobs: int = 1, +) -> NumberOfLociSweep: + rows: list[LocusThresholdRow] = [] + candidate_list = list(candidate_T_values) + for index, n_value in enumerate(loci_values): + run_params = Track1Parameters( + K=params.K, + N0=params.N0, + n=n_value, + u=params.u, + R=params.R, + T=params.T, + epochs=params.epochs, + p=params.p, + a_max=params.a_max, + ) + result: Optional[ThresholdSearchResult] = search_threshold_over_candidates( + params=run_params, + candidate_T_values=candidate_list, + runs=runs, + seed_start=seed_start + (index * 100000), + cache_path=cache_path, + jobs=jobs, + ) + rows.append( + LocusThresholdRow( + n=n_value, + threshold_T=None if result is None else float(result.threshold_T), + accepted=result is not None, + ) + ) + return NumberOfLociSweep(rows=rows, fit=fit_linear_cost_by_loci(rows)) diff --git a/src/renunney/track1_api.py b/src/renunney/track1_api.py index 257a240..dce9a87 100644 --- a/src/renunney/track1_api.py +++ b/src/renunney/track1_api.py @@ -15,10 +15,10 @@ from pathlib import Path from typing import Any, Optional from .legacy import ensure_legacy_python_path +from .track1_analysis import summarize_tracking, sweep_number_of_loci ensure_legacy_python_path() -from track1_analysis import summarize_tracking, sweep_number_of_loci from track1_dataset import generate_extinction_dataset from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl from track1_reference import Track1Parameters, simulate_run diff --git a/tests/test_track1_analysis.py b/tests/test_track1_analysis.py new file mode 100644 index 0000000..e5c6ba5 --- /dev/null +++ b/tests/test_track1_analysis.py @@ -0,0 +1,179 @@ +import sys +from dataclasses import dataclass +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +import renunney.track1_analysis as analysis +import renunney.track1_api as api + +from track1_reference import GenerationSummary, Track1Parameters + + +def test_fit_linear_cost_by_loci_recovers_line(): + rows = [ + analysis.LocusThresholdRow(n=1, threshold_T=12.0, accepted=True), + analysis.LocusThresholdRow(n=2, threshold_T=14.0, accepted=True), + analysis.LocusThresholdRow(n=3, threshold_T=16.0, accepted=True), + ] + fit = analysis.fit_linear_cost_by_loci(rows) + assert fit is not None + assert abs(fit.intercept_c0 - 10.0) < 1e-9 + assert abs(fit.slope_c1 - 2.0) < 1e-9 + + +def test_sweep_number_of_loci_uses_search_results(monkeypatch): + params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + + class Dummy: + def __init__(self, threshold_T): + self.threshold_T = threshold_T + + def fake_search(params, candidate_T_values, runs=20, seed_start=0, cache_path=None, jobs=1): + return Dummy(threshold_T=10.0 + params.n) + + monkeypatch.setattr(analysis, "search_threshold_over_candidates", fake_search) + sweep = analysis.sweep_number_of_loci(params, [1, 2, 3], [10, 20, 30], runs=2, seed_start=1, jobs=3) + assert [row.threshold_T for row in sweep.rows] == [11.0, 12.0, 13.0] + assert sweep.fit is not None + + +def test_run_config_loci_regression_mode(monkeypatch): + @dataclass(frozen=True) + class DummyFit: + intercept_c0: float = 5.0 + slope_c1: float = 2.0 + r_squared: float = 1.0 + points_used: int = 3 + + class DummySweep: + rows = [ + analysis.LocusThresholdRow(n=1, threshold_T=7.0, accepted=True), + analysis.LocusThresholdRow(n=2, threshold_T=9.0, accepted=True), + analysis.LocusThresholdRow(n=3, threshold_T=11.0, accepted=True), + ] + fit = DummyFit() + + monkeypatch.setattr(api, "sweep_number_of_loci", lambda *args, **kwargs: DummySweep()) + config = api.Track1RunConfig( + mode="loci_regression", + loci_values=[1, 2, 3], + t_start=10, + t_stop=30, + t_step=10, + runs=2, + ) + payload = api.run_config(config) + assert payload["mode"] == "loci_regression" + assert payload["loci_values"] == [1, 2, 3] + assert payload["fit"]["slope_c1"] == 2.0 + + +def test_summarize_tracking_detects_post_initial_nonzero_alleles(): + summaries = [ + GenerationSummary( + t=-2, + N=10, + female_fraction=0.5, + male_count=5, + female_count=5, + fecundity=1.0, + mean_fitness=1.0, + mean_expected_female_productivity=1.0, + target_value=-0.1, + mean_allele_value=0.0, + mean_genotype_value=0.0, + mean_tracking_gap=0.1, + paper_M=0.05, + expected_mutations_current_N=0.0001, + realized_mutation_count=0, + realized_mutation_rate_per_allele=0.0, + birth_count=0, + surviving_offspring_count=0, + ne_approx=5.0, + extinct=False, + ), + GenerationSummary( + t=-1, + N=10, + female_fraction=0.5, + male_count=5, + female_count=5, + fecundity=1.0, + mean_fitness=1.0, + mean_expected_female_productivity=1.0, + target_value=-0.05, + mean_allele_value=0.2, + mean_genotype_value=0.2, + mean_tracking_gap=0.25, + paper_M=0.05, + expected_mutations_current_N=0.0001, + realized_mutation_count=1, + realized_mutation_rate_per_allele=0.05, + birth_count=2, + surviving_offspring_count=1, + ne_approx=5.0, + extinct=False, + ), + ] + tracking = analysis.summarize_tracking(summaries) + assert tracking.extinction_occurred is False + assert tracking.first_extinction_t is None + assert tracking.first_nonzero_allele_t == -1 + assert tracking.last_nonzero_allele_t == -1 + assert tracking.stayed_zero_after_initialization is False + + +def test_summarize_tracking_detects_extinction_time(): + summaries = [ + GenerationSummary( + t=0, + N=10, + female_fraction=0.5, + male_count=5, + female_count=5, + fecundity=1.0, + mean_fitness=1.0, + mean_expected_female_productivity=1.0, + target_value=0.0, + mean_allele_value=0.0, + mean_genotype_value=0.0, + mean_tracking_gap=0.0, + paper_M=0.05, + expected_mutations_current_N=0.0001, + realized_mutation_count=0, + realized_mutation_rate_per_allele=0.0, + birth_count=0, + surviving_offspring_count=0, + ne_approx=5.0, + extinct=False, + ), + GenerationSummary( + t=1, + N=0, + female_fraction=0.0, + male_count=0, + female_count=0, + fecundity=0.0, + mean_fitness=0.0, + mean_expected_female_productivity=0.0, + target_value=0.1, + mean_allele_value=0.0, + mean_genotype_value=0.0, + mean_tracking_gap=-0.1, + paper_M=0.05, + expected_mutations_current_N=0.0, + realized_mutation_count=0, + realized_mutation_rate_per_allele=0.0, + birth_count=0, + surviving_offspring_count=0, + ne_approx=0.0, + extinct=True, + ), + ] + tracking = analysis.summarize_tracking(summaries) + assert tracking.extinction_occurred is True + assert tracking.first_extinction_t == 1 diff --git a/tests/test_track1_api.py b/tests/test_track1_api.py index d04d960..410e9cc 100644 --- a/tests/test_track1_api.py +++ b/tests/test_track1_api.py @@ -7,7 +7,7 @@ SRC_DIR = ROOT / "src" if str(SRC_DIR) not in sys.path: sys.path.insert(0, str(SRC_DIR)) -from renunney import track1_api as api +import renunney.track1_api as api def test_run_config_simulate_mode_returns_contract():