diff --git a/README.md b/README.md index ce1d3d6..c9d8f5f 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`. - a local Track 1 threshold/search layer for Nunney-style threshold checks, - a local Track 1 simulation kernel, - a local Track 1 report generator, +- a local Track 1 extinction-model data layer, - a Makefile for common tasks, - migration notes for pulling code into this repo in stages. @@ -90,7 +91,8 @@ The current state is split: - Track 1 threshold/search layer: local to `renunney` - Track 1 simulation kernel: local to `renunney` - Track 1 report generator: local to `renunney` -- Track 1 dataset, fit, and extinction-model helpers: still imported +- Track 1 extinction-model data layer: local to `renunney` +- Track 1 dataset and fit helpers: still imported from the older `cost_of_substitution` directory through the local compatibility layer diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index c94f5b0..e462efc 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -35,12 +35,13 @@ Operational code still lives in: - `src/renunney/track1_reference.py` 7. Track 1 report layer has been migrated locally: - `src/renunney/track1_report.py` -8. Migrate dataset, fit, and orchestration-adjacent Track 1 modules next: +8. Track 1 extinction-model data layer has been migrated locally: + - `src/renunney/track1_extinction.py` +9. Migrate dataset and fit modules next: - `python/track1_dataset.py` - `python/track1_fit.py` - - `python/track1_extinction.py` -9. Reduce or remove the remaining compatibility-layer imports after those modules are local. -10. Migrate docs and example configs last, after path references are updated. +10. Reduce or remove the remaining compatibility-layer imports after those modules are local. +11. Migrate docs and example configs last, after path references are updated. ## Constraint diff --git a/docs/WORKFLOW.md b/docs/WORKFLOW.md index 0a94c21..9a680f2 100644 --- a/docs/WORKFLOW.md +++ b/docs/WORKFLOW.md @@ -48,8 +48,8 @@ make status The Makefile now drives the local orchestration code in `renunney`, while the Track 1 runner/API boundary, analysis layer, threshold/search layer, and -simulation kernel and report generator are also local to `renunney`. The -remaining Track 1 dataset/fit helpers are still imported from the legacy -`cost_of_substitution` directory through the compatibility layer in -`src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission -are now local to `renunney/config`. +simulation kernel, report generator, and extinction-model data layer are also +local to `renunney`. The remaining Track 1 dataset/fit helpers are still +imported from the legacy `cost_of_substitution` directory through the +compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1 +configs used for submission are now local to `renunney/config`. diff --git a/src/renunney/__init__.py b/src/renunney/__init__.py index ff9ea93..107a7d5 100644 --- a/src/renunney/__init__.py +++ b/src/renunney/__init__.py @@ -28,6 +28,13 @@ from .track1_analysis import ( sweep_number_of_loci, ) from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload +from .track1_extinction import ( + ExtinctionGenerationRow, + ExtinctionRunRow, + build_extinction_generation_rows, + build_extinction_run_row, + save_jsonl, +) from .track1_reference import ( GenerationSummary, PopulationState, @@ -106,6 +113,8 @@ __all__ = [ "expected_female_productivity", "expected_mutations_for_population", "evaluate_threshold_candidate", + "ExtinctionGenerationRow", + "ExtinctionRunRow", "female_fecundity", "female_fraction", "fit_linear_cost_by_loci", @@ -114,6 +123,8 @@ __all__ = [ "generate_report_bundle", "initialize_population", "is_extinct", + "build_extinction_generation_rows", + "build_extinction_run_row", "load_config", "nunney_threshold_accepts", "paper_mutation_supply_M", @@ -128,6 +139,7 @@ __all__ = [ "simulate_run", "run_extinction_check", "save_payload", + "save_jsonl", "search_threshold_over_candidates", "summarize_tracking", "summarize_generation", diff --git a/src/renunney/track1_extinction.py b/src/renunney/track1_extinction.py new file mode 100644 index 0000000..570ef13 --- /dev/null +++ b/src/renunney/track1_extinction.py @@ -0,0 +1,259 @@ +""" +track1_extinction.py + +Dataset builders and analysis scaffolding for extinction-risk modeling on top +of the Track 1 simulation output. +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +import json +from pathlib import Path +from typing import Iterable + +from .track1_analysis import TrackingSummary, summarize_tracking +from .track1_reference import GenerationSummary, Track1Parameters + + +@dataclass(frozen=True) +class ExtinctionGenerationRow: + """Per-generation covariate row for discrete-time extinction modeling.""" + + seed: int + t: int + K: int + N0: int + n: int + u: float + M: float + R: float + T: int + epochs: int + p: float + N: int + female_count: int + male_count: int + female_fraction: float + fecundity: float + mean_fitness: float + mean_expected_female_productivity: float + target_value: float + mean_allele_value: float + mean_genotype_value: float + mean_tracking_gap: float + abs_tracking_gap: float + birth_count: int + surviving_offspring_count: int + survival_fraction: float + replacement_deficit: float + expected_mutation_count: float + realized_mutation_count: int + mutation_shortfall: float + realized_mutation_rate_per_allele: float + ne_approx: float + extinction_occurred: bool + extinction_next_step: bool + + +@dataclass(frozen=True) +class ExtinctionRunRow: + """Run-level summary row for finite-horizon extinction modeling.""" + + seed: int + K: int + N0: int + n: int + u: float + M: float + R: float + T: int + epochs: int + p: float + generations_recorded: int + extinction_occurred: bool + first_extinction_t: int | None + final_extinct: bool + final_N: int + min_N: int + max_N: int + mean_N: float + final_mean_allele_value: float + final_target_value: float + final_tracking_gap: float + mean_abs_tracking_gap: float + max_abs_tracking_gap: float + first_nonzero_allele_t: int | None + last_nonzero_allele_t: int | None + stayed_zero_after_initialization: bool + first_productivity_below_replacement_t: int | None + fraction_generations_below_replacement: float + longest_zero_mutation_streak: int + cumulative_expected_mutations: float + cumulative_realized_mutations: int + cumulative_mutation_shortfall: float + + +def _survival_fraction(summary: GenerationSummary) -> float: + if summary.birth_count <= 0: + return 0.0 + return float(summary.surviving_offspring_count / summary.birth_count) + + +def _replacement_deficit(summary: GenerationSummary) -> float: + return float(max(0.0, 2.0 - summary.mean_expected_female_productivity)) + + +def _mutation_shortfall(summary: GenerationSummary) -> float: + return float(max(0.0, summary.expected_mutations_current_N - summary.realized_mutation_count)) + + +def build_extinction_generation_rows( + params: Track1Parameters, + summaries: Iterable[GenerationSummary], + seed: int, +) -> list[ExtinctionGenerationRow]: + summary_list = list(summaries) + rows: list[ExtinctionGenerationRow] = [] + for index, summary in enumerate(summary_list): + next_extinct = bool(summary_list[index + 1].extinct) if index + 1 < len(summary_list) else bool(summary.extinct) + rows.append( + ExtinctionGenerationRow( + seed=seed, + t=int(summary.t), + K=params.K, + N0=params.N0, + n=params.n, + u=params.u, + M=params.M, + R=params.R, + T=params.T, + epochs=params.epochs, + p=params.p, + N=int(summary.N), + female_count=int(summary.female_count), + male_count=int(summary.male_count), + female_fraction=float(summary.female_fraction), + fecundity=float(summary.fecundity), + mean_fitness=float(summary.mean_fitness), + mean_expected_female_productivity=float(summary.mean_expected_female_productivity), + target_value=float(summary.target_value), + mean_allele_value=float(summary.mean_allele_value), + mean_genotype_value=float(summary.mean_genotype_value), + mean_tracking_gap=float(summary.mean_tracking_gap), + abs_tracking_gap=float(abs(summary.mean_tracking_gap)), + birth_count=int(summary.birth_count), + surviving_offspring_count=int(summary.surviving_offspring_count), + survival_fraction=_survival_fraction(summary), + replacement_deficit=_replacement_deficit(summary), + expected_mutation_count=float(summary.expected_mutations_current_N), + realized_mutation_count=int(summary.realized_mutation_count), + mutation_shortfall=_mutation_shortfall(summary), + realized_mutation_rate_per_allele=float(summary.realized_mutation_rate_per_allele), + ne_approx=float(summary.ne_approx), + extinction_occurred=bool(summary.extinct), + extinction_next_step=next_extinct, + ) + ) + return rows + + +def build_extinction_run_row( + params: Track1Parameters, + summaries: Iterable[GenerationSummary], + seed: int, +) -> ExtinctionRunRow: + summary_list = list(summaries) + tracking: TrackingSummary = summarize_tracking(summary_list) + if not summary_list: + return ExtinctionRunRow( + seed=seed, + K=params.K, + N0=params.N0, + n=params.n, + u=params.u, + M=params.M, + R=params.R, + T=params.T, + epochs=params.epochs, + p=params.p, + generations_recorded=0, + extinction_occurred=False, + first_extinction_t=None, + final_extinct=True, + final_N=0, + min_N=0, + max_N=0, + mean_N=0.0, + final_mean_allele_value=0.0, + final_target_value=0.0, + final_tracking_gap=0.0, + mean_abs_tracking_gap=0.0, + max_abs_tracking_gap=0.0, + first_nonzero_allele_t=None, + last_nonzero_allele_t=None, + stayed_zero_after_initialization=True, + first_productivity_below_replacement_t=None, + fraction_generations_below_replacement=0.0, + longest_zero_mutation_streak=0, + cumulative_expected_mutations=0.0, + cumulative_realized_mutations=0, + cumulative_mutation_shortfall=0.0, + ) + + below_replacement = [s for s in summary_list if s.mean_expected_female_productivity < 2.0] + first_below_replacement_t = None if not below_replacement else int(below_replacement[0].t) + zero_mutation_streak = 0 + longest_zero_mutation_streak = 0 + for summary in summary_list: + if summary.realized_mutation_count == 0: + zero_mutation_streak += 1 + longest_zero_mutation_streak = max(longest_zero_mutation_streak, zero_mutation_streak) + else: + zero_mutation_streak = 0 + cumulative_expected = float(sum(s.expected_mutations_current_N for s in summary_list)) + cumulative_realized = int(sum(s.realized_mutation_count for s in summary_list)) + cumulative_shortfall = float(sum(_mutation_shortfall(s) for s in summary_list)) + final = summary_list[-1] + return ExtinctionRunRow( + seed=seed, + K=params.K, + N0=params.N0, + n=params.n, + u=params.u, + M=params.M, + R=params.R, + T=params.T, + epochs=params.epochs, + p=params.p, + generations_recorded=len(summary_list), + extinction_occurred=tracking.extinction_occurred, + first_extinction_t=tracking.first_extinction_t, + final_extinct=bool(final.extinct), + final_N=int(final.N), + min_N=min(int(s.N) for s in summary_list), + max_N=max(int(s.N) for s in summary_list), + mean_N=float(sum(s.N for s in summary_list) / len(summary_list)), + final_mean_allele_value=float(tracking.final_mean_allele_value), + final_target_value=float(tracking.final_target_value), + final_tracking_gap=float(tracking.final_tracking_gap), + mean_abs_tracking_gap=float(tracking.mean_abs_tracking_gap), + max_abs_tracking_gap=float(tracking.max_abs_tracking_gap), + first_nonzero_allele_t=tracking.first_nonzero_allele_t, + last_nonzero_allele_t=tracking.last_nonzero_allele_t, + stayed_zero_after_initialization=bool(tracking.stayed_zero_after_initialization), + first_productivity_below_replacement_t=first_below_replacement_t, + fraction_generations_below_replacement=float(len(below_replacement) / len(summary_list)), + longest_zero_mutation_streak=longest_zero_mutation_streak, + cumulative_expected_mutations=cumulative_expected, + cumulative_realized_mutations=cumulative_realized, + cumulative_mutation_shortfall=cumulative_shortfall, + ) + + +def save_jsonl(rows: Iterable[object], path: str | Path) -> None: + out = Path(path) + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("w", encoding="utf-8") as handle: + for row in rows: + handle.write(json.dumps(asdict(row), sort_keys=True) + "\n") diff --git a/tests/test_track1_extinction.py b/tests/test_track1_extinction.py new file mode 100644 index 0000000..8932699 --- /dev/null +++ b/tests/test_track1_extinction.py @@ -0,0 +1,95 @@ +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +import renunney.track1_extinction as ext +import renunney.track1_reference as ref + + +def _sample_summaries(): + return [ + ref.GenerationSummary( + t=0, + N=10, + female_fraction=0.5, + male_count=5, + female_count=5, + fecundity=3.0, + mean_fitness=0.8, + mean_expected_female_productivity=2.4, + target_value=0.0, + mean_allele_value=0.2, + mean_genotype_value=0.2, + mean_tracking_gap=0.2, + paper_M=0.05, + expected_mutations_current_N=0.1, + realized_mutation_count=0, + realized_mutation_rate_per_allele=0.0, + birth_count=6, + surviving_offspring_count=4, + ne_approx=5.0, + extinct=False, + ), + ref.GenerationSummary( + t=1, + N=0, + female_fraction=0.0, + male_count=0, + female_count=0, + fecundity=0.0, + mean_fitness=0.0, + mean_expected_female_productivity=0.0, + target_value=0.1, + mean_allele_value=0.0, + mean_genotype_value=0.0, + mean_tracking_gap=-0.1, + paper_M=0.05, + expected_mutations_current_N=0.0, + realized_mutation_count=0, + realized_mutation_rate_per_allele=0.0, + birth_count=0, + surviving_offspring_count=0, + ne_approx=0.0, + extinct=True, + ), + ] + + +def test_build_extinction_generation_rows_derives_expected_fields(): + params = ref.Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + rows = ext.build_extinction_generation_rows(params, _sample_summaries(), seed=7) + assert len(rows) == 2 + assert rows[0].M == 0.05 + assert rows[0].survival_fraction == 4 / 6 + assert rows[0].replacement_deficit == 0.0 + assert rows[0].mutation_shortfall == 0.1 + assert rows[0].extinction_next_step is True + assert rows[1].extinction_occurred is True + + +def test_build_extinction_run_row_summarizes_extinction_and_mutation_streaks(): + params = ref.Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + row = ext.build_extinction_run_row(params, _sample_summaries(), seed=7) + assert row.extinction_occurred is True + assert row.first_extinction_t == 1 + assert row.first_productivity_below_replacement_t == 1 + assert row.longest_zero_mutation_streak == 2 + assert row.cumulative_expected_mutations == 0.1 + assert row.cumulative_realized_mutations == 0 + + +def test_save_jsonl_writes_row_data(tmp_path: Path): + params = ref.Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + row = ext.build_extinction_run_row(params, _sample_summaries(), seed=7) + path = tmp_path / "runs.jsonl" + ext.save_jsonl([row], path) + lines = path.read_text(encoding="utf-8").splitlines() + assert len(lines) == 1 + parsed = json.loads(lines[0]) + assert parsed["seed"] == 7 + assert parsed["extinction_occurred"] is True