From 83350e68c89d7ab6c2b6a8519a1f4b848e70e2bc Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 11 Apr 2026 06:31:39 -0400 Subject: [PATCH] Migrate Track 1 threshold layer into renunney --- README.md | 2 + docs/MIGRATION.md | 10 +- docs/WORKFLOW.md | 10 +- src/renunney/__init__.py | 16 ++ src/renunney/track1_analysis.py | 2 +- src/renunney/track1_api.py | 2 +- src/renunney/track1_threshold.py | 262 +++++++++++++++++++++++++++++++ tests/test_track1_threshold.py | 82 ++++++++++ 8 files changed, 375 insertions(+), 11 deletions(-) create mode 100644 src/renunney/track1_threshold.py create mode 100644 tests/test_track1_threshold.py diff --git a/README.md b/README.md index ccef234..283e14c 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`. - local paper-scale Figure 1 submission configs, - a local Track 1 runner and config/API layer, - a local Track 1 analysis layer for tracking summaries and loci-regression, +- a local Track 1 threshold/search layer for Nunney-style threshold checks, - a Makefile for common tasks, - migration notes for pulling code into this repo in stages. @@ -84,6 +85,7 @@ The current state is split: - orchestration control plane: local to `renunney` - Track 1 runner and config/API layer: local to `renunney` - Track 1 analysis layer: local to `renunney` +- Track 1 threshold/search layer: local to `renunney` - Track 1 simulation backend: still in the older `cost_of_substitution` directory and imported through the local compatibility layer diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index e245614..ee884e7 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -29,17 +29,19 @@ Operational code still lives in: - `src/renunney/track1_api.py` 4. Track 1 analysis boundary has been migrated locally: - `src/renunney/track1_analysis.py` -5. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. -6. Migrate the Track 1 simulation core after the runner path is stable: +5. Track 1 threshold/search boundary has been migrated locally: + - `src/renunney/track1_threshold.py` +6. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. +7. Migrate the Track 1 simulation core after the runner path is stable: - `python/track1_reference.py` - `python/track1_threshold.py` - `python/track1_analysis.py` -7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable: +8. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable: - `python/track1_report.py` - `python/track1_dataset.py` - `python/track1_fit.py` - `python/track1_extinction.py` -8. Migrate docs and example configs last, after path references are updated. +9. Migrate docs and example configs last, after path references are updated. ## Constraint diff --git a/docs/WORKFLOW.md b/docs/WORKFLOW.md index 300aea4..c3a71d9 100644 --- a/docs/WORKFLOW.md +++ b/docs/WORKFLOW.md @@ -47,8 +47,8 @@ make status ## Current Assumption The Makefile now drives the local orchestration code in `renunney`, while the -Track 1 runner/API boundary and analysis layer are also local to `renunney`. -The simulation kernel is still imported from the legacy `cost_of_substitution` -directory through the compatibility layer in `src/renunney/legacy.py`. The -paper-scale Figure 1 configs used for submission are now local to -`renunney/config`. +Track 1 runner/API boundary, analysis layer, and threshold/search layer are +also local to `renunney`. The simulation kernel is still imported from the +legacy `cost_of_substitution` directory through the compatibility layer in +`src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission +are now local to `renunney/config`. diff --git a/src/renunney/__init__.py b/src/renunney/__init__.py index 6aed776..eeca0b9 100644 --- a/src/renunney/__init__.py +++ b/src/renunney/__init__.py @@ -28,6 +28,15 @@ from .track1_analysis import ( sweep_number_of_loci, ) from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload +from .track1_threshold import ( + ThresholdCheck, + ThresholdSearchResult, + evaluate_threshold_candidate, + nunney_threshold_accepts, + published_threshold_accepts, + run_extinction_check, + search_threshold_over_candidates, +) __all__ = [ "ClaimedJob", @@ -52,13 +61,20 @@ __all__ = [ "run_worker_loop", "submit_job_manifest", "submit_track1_figure1_jobs", + "ThresholdCheck", + "ThresholdSearchResult", "TrackingSummary", "Track1RunConfig", "config_from_mapping", + "evaluate_threshold_candidate", "fit_linear_cost_by_loci", "load_config", + "nunney_threshold_accepts", + "published_threshold_accepts", "run_config", + "run_extinction_check", "save_payload", + "search_threshold_over_candidates", "summarize_tracking", "sweep_number_of_loci", ] diff --git a/src/renunney/track1_analysis.py b/src/renunney/track1_analysis.py index 6101f21..ab8d2df 100644 --- a/src/renunney/track1_analysis.py +++ b/src/renunney/track1_analysis.py @@ -15,11 +15,11 @@ from typing import Iterable, Optional import numpy as np from .legacy import ensure_legacy_python_path +from .track1_threshold import ThresholdSearchResult, search_threshold_over_candidates ensure_legacy_python_path() from track1_reference import GenerationSummary, Track1Parameters -from track1_threshold import ThresholdSearchResult, search_threshold_over_candidates @dataclass(frozen=True) diff --git a/src/renunney/track1_api.py b/src/renunney/track1_api.py index dce9a87..8fb9a9d 100644 --- a/src/renunney/track1_api.py +++ b/src/renunney/track1_api.py @@ -16,6 +16,7 @@ from typing import Any, Optional from .legacy import ensure_legacy_python_path from .track1_analysis import summarize_tracking, sweep_number_of_loci +from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates ensure_legacy_python_path() @@ -23,7 +24,6 @@ from track1_dataset import generate_extinction_dataset from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl from track1_reference import Track1Parameters, simulate_run from track1_report import generate_report_bundle -from track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates @dataclass(frozen=True, init=False) diff --git a/src/renunney/track1_threshold.py b/src/renunney/track1_threshold.py new file mode 100644 index 0000000..31d029d --- /dev/null +++ b/src/renunney/track1_threshold.py @@ -0,0 +1,262 @@ +""" +track1_threshold.py + +Local Track 1 threshold-search layer for renunney. + +This stage keeps the simulation kernel in the legacy tree while moving the +historical threshold heuristic inward. +""" + +from __future__ import annotations + +from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass +import json +from pathlib import Path +from typing import Iterable, Optional + +from .legacy import ensure_legacy_python_path + +ensure_legacy_python_path() + +from track1_reference import Track1Parameters, simulate_run + + +@dataclass(frozen=True) +class ThresholdCheck: + """Summary for one T value under the Track 1 heuristic.""" + + T: float + runs: int + extinctions: int + + @property + def survived_all(self) -> bool: + return self.extinctions == 0 + + +@dataclass(frozen=True) +class ThresholdSearchResult: + """Result of Nunney-style threshold search.""" + + threshold_T: float + baseline_check: ThresholdCheck + check_1_02: ThresholdCheck + check_1_05: ThresholdCheck + check_1_10: ThresholdCheck + retest_check: Optional[ThresholdCheck] + + +def _cache_key(params: Track1Parameters, T_value: float, runs: int, seed_start: int) -> str: + return json.dumps( + { + "K": params.K, + "N0": params.N0, + "n": params.n, + "u": params.u, + "R": params.R, + "T_value": T_value, + "epochs": params.epochs, + "p": params.p, + "a_max": params.a_max, + "runs": runs, + "seed_start": seed_start, + }, + sort_keys=True, + ) + + +def _load_cache(path: str | Path | None) -> dict: + if path is None: + return {} + cache_path = Path(path) + if not cache_path.exists(): + return {} + return json.loads(cache_path.read_text(encoding="utf-8")) + + +def _save_cache(path: str | Path | None, cache: dict) -> None: + if path is None: + return + cache_path = Path(path) + cache_path.parent.mkdir(parents=True, exist_ok=True) + cache_path.write_text(json.dumps(cache, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def _single_run_extinct(params: Track1Parameters, sim_T: int, seed: int) -> bool: + run_params = Track1Parameters( + K=params.K, + N0=params.N0, + n=params.n, + u=params.u, + R=params.R, + T=sim_T, + epochs=params.epochs, + p=params.p, + a_max=params.a_max, + ) + summaries = simulate_run(run_params, seed=seed) + return bool(summaries and summaries[-1].extinct) + + +def run_extinction_check( + params: Track1Parameters, + T_value: float, + runs: int = 20, + seed_start: int = 0, + cache_path: str | Path | None = None, + jobs: int = 1, +) -> ThresholdCheck: + """ + Run repeated simulations at one T value. + + reconstruction_choice: + T is rounded to the nearest integer generation count for the simulator. + """ + + cache = _load_cache(cache_path) + key = _cache_key(params, T_value, runs, seed_start) + if key in cache: + cached = cache[key] + return ThresholdCheck(T=cached["T"], runs=cached["runs"], extinctions=cached["extinctions"]) + + sim_T = int(round(T_value)) + seeds = [seed_start + run_index for run_index in range(runs)] + if jobs <= 1 or runs <= 1: + extinctions = sum(1 for seed in seeds if _single_run_extinct(params, sim_T, seed)) + else: + worker_count = min(jobs, runs) + with ProcessPoolExecutor(max_workers=worker_count) as executor: + extinctions = sum(executor.map(_single_run_extinct, [params] * runs, [sim_T] * runs, seeds)) + result = ThresholdCheck(T=T_value, runs=runs, extinctions=extinctions) + cache[key] = {"T": result.T, "runs": result.runs, "extinctions": result.extinctions} + _save_cache(cache_path, cache) + return result + + +def nunney_threshold_accepts( + baseline_check: ThresholdCheck, + check_1_02: ThresholdCheck, + check_1_05: ThresholdCheck, + check_1_10: ThresholdCheck, +) -> tuple[bool, int]: + """Return acceptance and the number of failed higher checks.""" + + if not baseline_check.survived_all: + return False, 3 + higher_checks = [check_1_02, check_1_05, check_1_10] + failures = sum(0 if check.survived_all else 1 for check in higher_checks) + return failures == 0, failures + + +def evaluate_threshold_candidate( + params: Track1Parameters, + T_value: float, + runs: int = 20, + seed_start: int = 0, + cache_path: str | Path | None = None, + jobs: int = 1, +) -> ThresholdSearchResult: + """Evaluate one candidate threshold T using Nunney's published checks.""" + + baseline_check = run_extinction_check( + params, + T_value, + runs=runs, + seed_start=seed_start, + cache_path=cache_path, + jobs=jobs, + ) + check_1_02 = run_extinction_check( + params, + 1.02 * T_value, + runs=runs, + seed_start=seed_start + 1000, + cache_path=cache_path, + jobs=jobs, + ) + check_1_05 = run_extinction_check( + params, + 1.05 * T_value, + runs=runs, + seed_start=seed_start + 2000, + cache_path=cache_path, + jobs=jobs, + ) + check_1_10 = run_extinction_check( + params, + 1.10 * T_value, + runs=runs, + seed_start=seed_start + 3000, + cache_path=cache_path, + jobs=jobs, + ) + + _, failures = nunney_threshold_accepts( + baseline_check=baseline_check, + check_1_02=check_1_02, + check_1_05=check_1_05, + check_1_10=check_1_10, + ) + + retest_check: Optional[ThresholdCheck] = None + if failures == 1: + failed = [check for check in (check_1_02, check_1_05, check_1_10) if not check.survived_all][0] + retest_check = run_extinction_check( + params, + failed.T, + runs=runs, + seed_start=seed_start + 4000, + cache_path=cache_path, + jobs=jobs, + ) + + return ThresholdSearchResult( + threshold_T=T_value, + baseline_check=baseline_check, + check_1_02=check_1_02, + check_1_05=check_1_05, + check_1_10=check_1_10, + retest_check=retest_check, + ) + + +def published_threshold_accepts(result: ThresholdSearchResult) -> bool: + """True if the candidate passes Nunney's published criterion.""" + + if not result.baseline_check.survived_all: + return False + + higher_checks = [result.check_1_02, result.check_1_05, result.check_1_10] + failures = [check for check in higher_checks if not check.survived_all] + if len(failures) == 0: + return True + if len(failures) > 1: + return False + if result.retest_check is None: + return False + return result.retest_check.survived_all + + +def search_threshold_over_candidates( + params: Track1Parameters, + candidate_T_values: Iterable[float], + runs: int = 20, + seed_start: int = 0, + cache_path: str | Path | None = None, + jobs: int = 1, +) -> Optional[ThresholdSearchResult]: + """Search candidate T values from below and return the first accepted threshold.""" + + for index, T_value in enumerate(candidate_T_values): + result = evaluate_threshold_candidate( + params=params, + T_value=T_value, + runs=runs, + seed_start=seed_start + (index * 10000), + cache_path=cache_path, + jobs=jobs, + ) + if published_threshold_accepts(result): + return result + return None diff --git a/tests/test_track1_threshold.py b/tests/test_track1_threshold.py new file mode 100644 index 0000000..f3f9e0d --- /dev/null +++ b/tests/test_track1_threshold.py @@ -0,0 +1,82 @@ +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +import renunney.track1_threshold as thr + +from track1_reference import Track1Parameters + + +def test_published_threshold_accepts_when_all_checks_survive(): + result = thr.ThresholdSearchResult( + threshold_T=100.0, + baseline_check=thr.ThresholdCheck(T=100.0, runs=20, extinctions=0), + check_1_02=thr.ThresholdCheck(T=102.0, runs=20, extinctions=0), + check_1_05=thr.ThresholdCheck(T=105.0, runs=20, extinctions=0), + check_1_10=thr.ThresholdCheck(T=110.0, runs=20, extinctions=0), + retest_check=None, + ) + assert thr.published_threshold_accepts(result) is True + + +def test_published_threshold_accepts_single_failure_if_retest_survives(): + result = thr.ThresholdSearchResult( + threshold_T=100.0, + baseline_check=thr.ThresholdCheck(T=100.0, runs=20, extinctions=0), + check_1_02=thr.ThresholdCheck(T=102.0, runs=20, extinctions=1), + check_1_05=thr.ThresholdCheck(T=105.0, runs=20, extinctions=0), + check_1_10=thr.ThresholdCheck(T=110.0, runs=20, extinctions=0), + retest_check=thr.ThresholdCheck(T=102.0, runs=20, extinctions=0), + ) + assert thr.published_threshold_accepts(result) is True + + +def test_published_threshold_rejects_multiple_failures(): + result = thr.ThresholdSearchResult( + threshold_T=100.0, + baseline_check=thr.ThresholdCheck(T=100.0, runs=20, extinctions=0), + check_1_02=thr.ThresholdCheck(T=102.0, runs=20, extinctions=1), + check_1_05=thr.ThresholdCheck(T=105.0, runs=20, extinctions=1), + check_1_10=thr.ThresholdCheck(T=110.0, runs=20, extinctions=0), + retest_check=None, + ) + assert thr.published_threshold_accepts(result) is False + + +def test_search_threshold_over_candidates_uses_first_accepted(monkeypatch): + params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + + def fake_eval(params, T_value, runs=20, seed_start=0, cache_path=None, jobs=1): + if T_value == 60: + return thr.ThresholdSearchResult( + threshold_T=60.0, + baseline_check=thr.ThresholdCheck(T=60.0, runs=runs, extinctions=1), + check_1_02=thr.ThresholdCheck(T=61.2, runs=runs, extinctions=0), + check_1_05=thr.ThresholdCheck(T=63.0, runs=runs, extinctions=0), + check_1_10=thr.ThresholdCheck(T=66.0, runs=runs, extinctions=0), + retest_check=None, + ) + return thr.ThresholdSearchResult( + threshold_T=float(T_value), + baseline_check=thr.ThresholdCheck(T=float(T_value), runs=runs, extinctions=0), + check_1_02=thr.ThresholdCheck(T=1.02 * T_value, runs=runs, extinctions=0), + check_1_05=thr.ThresholdCheck(T=1.05 * T_value, runs=runs, extinctions=0), + check_1_10=thr.ThresholdCheck(T=1.10 * T_value, runs=runs, extinctions=0), + retest_check=None, + ) + + monkeypatch.setattr(thr, "evaluate_threshold_candidate", fake_eval) + result = thr.search_threshold_over_candidates(params, [60, 80, 100], runs=3, seed_start=10) + assert result is not None + assert result.threshold_T == 80.0 + + +def test_run_extinction_check_parallel_matches_serial(): + params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20) + serial = thr.run_extinction_check(params, T_value=20.0, runs=2, seed_start=5, jobs=1) + parallel = thr.run_extinction_check(params, T_value=20.0, runs=2, seed_start=5, jobs=2) + assert parallel == serial