Migrate Track 1 analysis layer into renunney
This commit is contained in:
parent
a6d1326165
commit
acbb90f452
|
|
@ -22,6 +22,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
|
|||
- a local orchestration CLI and library,
|
||||
- local paper-scale Figure 1 submission configs,
|
||||
- a local Track 1 runner and config/API layer,
|
||||
- a local Track 1 analysis layer for tracking summaries and loci-regression,
|
||||
- a Makefile for common tasks,
|
||||
- migration notes for pulling code into this repo in stages.
|
||||
|
||||
|
|
@ -82,6 +83,7 @@ The current state is split:
|
|||
|
||||
- orchestration control plane: local to `renunney`
|
||||
- Track 1 runner and config/API layer: local to `renunney`
|
||||
- Track 1 analysis layer: local to `renunney`
|
||||
- Track 1 simulation backend: still in the older `cost_of_substitution`
|
||||
directory and imported through the local compatibility layer
|
||||
|
||||
|
|
|
|||
|
|
@ -27,17 +27,19 @@ Operational code still lives in:
|
|||
3. Track 1 runner and API boundary have been migrated locally:
|
||||
- `scripts/run_track1.py`
|
||||
- `src/renunney/track1_api.py`
|
||||
4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
|
||||
5. Migrate the Track 1 simulation core after the runner path is stable:
|
||||
4. Track 1 analysis boundary has been migrated locally:
|
||||
- `src/renunney/track1_analysis.py`
|
||||
5. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
|
||||
6. Migrate the Track 1 simulation core after the runner path is stable:
|
||||
- `python/track1_reference.py`
|
||||
- `python/track1_threshold.py`
|
||||
- `python/track1_analysis.py`
|
||||
6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
|
||||
7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
|
||||
- `python/track1_report.py`
|
||||
- `python/track1_dataset.py`
|
||||
- `python/track1_fit.py`
|
||||
- `python/track1_extinction.py`
|
||||
7. Migrate docs and example configs last, after path references are updated.
|
||||
8. Migrate docs and example configs last, after path references are updated.
|
||||
|
||||
## Constraint
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ make status
|
|||
## Current Assumption
|
||||
|
||||
The Makefile now drives the local orchestration code in `renunney`, while the
|
||||
Track 1 runner/API boundary is also local to `renunney`. The simulation kernel
|
||||
is still imported from the legacy `cost_of_substitution` directory through the
|
||||
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
|
||||
configs used for submission are now local to `renunney/config`.
|
||||
Track 1 runner/API boundary and analysis layer are also local to `renunney`.
|
||||
The simulation kernel is still imported from the legacy `cost_of_substitution`
|
||||
directory through the compatibility layer in `src/renunney/legacy.py`. The
|
||||
paper-scale Figure 1 configs used for submission are now local to
|
||||
`renunney/config`.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,15 @@ from .orchestration import (
|
|||
submit_job_manifest,
|
||||
submit_track1_figure1_jobs,
|
||||
)
|
||||
from .track1_analysis import (
|
||||
LinearCostFit,
|
||||
LocusThresholdRow,
|
||||
NumberOfLociSweep,
|
||||
TrackingSummary,
|
||||
fit_linear_cost_by_loci,
|
||||
summarize_tracking,
|
||||
sweep_number_of_loci,
|
||||
)
|
||||
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
|
||||
|
||||
__all__ = [
|
||||
|
|
@ -35,14 +44,21 @@ __all__ = [
|
|||
"list_job_results",
|
||||
"list_jobs",
|
||||
"load_job_manifest",
|
||||
"LinearCostFit",
|
||||
"LocusThresholdRow",
|
||||
"NumberOfLociSweep",
|
||||
"repo_root",
|
||||
"run_one_job",
|
||||
"run_worker_loop",
|
||||
"submit_job_manifest",
|
||||
"submit_track1_figure1_jobs",
|
||||
"TrackingSummary",
|
||||
"Track1RunConfig",
|
||||
"config_from_mapping",
|
||||
"fit_linear_cost_by_loci",
|
||||
"load_config",
|
||||
"run_config",
|
||||
"save_payload",
|
||||
"summarize_tracking",
|
||||
"sweep_number_of_loci",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
track1_analysis.py
|
||||
|
||||
Local Track 1 analysis helpers for renunney.
|
||||
|
||||
This stage keeps the simulation kernel in the legacy tree while moving the
|
||||
analysis/reporting boundary inward.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .legacy import ensure_legacy_python_path
|
||||
|
||||
ensure_legacy_python_path()
|
||||
|
||||
from track1_reference import GenerationSummary, Track1Parameters
|
||||
from track1_threshold import ThresholdSearchResult, search_threshold_over_candidates
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LocusThresholdRow:
|
||||
"""Threshold result for one number-of-loci setting."""
|
||||
|
||||
n: int
|
||||
threshold_T: Optional[float]
|
||||
accepted: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LinearCostFit:
|
||||
"""Linear regression C = C0 + n*C1."""
|
||||
|
||||
intercept_c0: float
|
||||
slope_c1: float
|
||||
r_squared: float
|
||||
points_used: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NumberOfLociSweep:
|
||||
"""Combined threshold rows and fitted regression."""
|
||||
|
||||
rows: list[LocusThresholdRow]
|
||||
fit: Optional[LinearCostFit]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TrackingSummary:
|
||||
"""Condensed allele-tracking diagnostics for one simulation run."""
|
||||
|
||||
extinction_occurred: bool
|
||||
first_extinction_t: Optional[int]
|
||||
first_nonzero_allele_t: Optional[int]
|
||||
last_nonzero_allele_t: Optional[int]
|
||||
stayed_zero_after_initialization: bool
|
||||
max_abs_tracking_gap: float
|
||||
final_tracking_gap: float
|
||||
mean_abs_tracking_gap: float
|
||||
final_mean_allele_value: float
|
||||
final_target_value: float
|
||||
|
||||
|
||||
def summarize_tracking(
|
||||
summaries: Iterable[GenerationSummary],
|
||||
zero_tol: float = 1.0e-15,
|
||||
) -> TrackingSummary:
|
||||
summary_list = list(summaries)
|
||||
if not summary_list:
|
||||
return TrackingSummary(
|
||||
extinction_occurred=False,
|
||||
first_extinction_t=None,
|
||||
first_nonzero_allele_t=None,
|
||||
last_nonzero_allele_t=None,
|
||||
stayed_zero_after_initialization=True,
|
||||
max_abs_tracking_gap=0.0,
|
||||
final_tracking_gap=0.0,
|
||||
mean_abs_tracking_gap=0.0,
|
||||
final_mean_allele_value=0.0,
|
||||
final_target_value=0.0,
|
||||
)
|
||||
|
||||
nonzero = [summary for summary in summary_list if abs(summary.mean_allele_value) > zero_tol]
|
||||
extinct_rows = [summary for summary in summary_list if summary.extinct]
|
||||
first_nonzero_t = None if not nonzero else int(nonzero[0].t)
|
||||
last_nonzero_t = None if not nonzero else int(nonzero[-1].t)
|
||||
first_extinction_t = None if not extinct_rows else int(extinct_rows[0].t)
|
||||
post_init = summary_list[1:]
|
||||
stayed_zero_after_initialization = all(
|
||||
abs(summary.mean_allele_value) <= zero_tol for summary in post_init
|
||||
)
|
||||
abs_gaps = np.array([abs(summary.mean_tracking_gap) for summary in summary_list], dtype=float)
|
||||
final = summary_list[-1]
|
||||
return TrackingSummary(
|
||||
extinction_occurred=bool(extinct_rows),
|
||||
first_extinction_t=first_extinction_t,
|
||||
first_nonzero_allele_t=first_nonzero_t,
|
||||
last_nonzero_allele_t=last_nonzero_t,
|
||||
stayed_zero_after_initialization=stayed_zero_after_initialization,
|
||||
max_abs_tracking_gap=float(np.max(abs_gaps)),
|
||||
final_tracking_gap=float(final.mean_tracking_gap),
|
||||
mean_abs_tracking_gap=float(np.mean(abs_gaps)),
|
||||
final_mean_allele_value=float(final.mean_allele_value),
|
||||
final_target_value=float(final.target_value),
|
||||
)
|
||||
|
||||
|
||||
def fit_linear_cost_by_loci(rows: Iterable[LocusThresholdRow]) -> Optional[LinearCostFit]:
|
||||
usable = [row for row in rows if row.accepted and row.threshold_T is not None]
|
||||
if len(usable) < 2:
|
||||
return None
|
||||
|
||||
x = np.array([row.n for row in usable], dtype=float)
|
||||
y = np.array([row.threshold_T for row in usable], dtype=float)
|
||||
slope, intercept = np.polyfit(x, y, 1)
|
||||
yhat = intercept + slope * x
|
||||
ss_res = float(np.sum((y - yhat) ** 2))
|
||||
ss_tot = float(np.sum((y - np.mean(y)) ** 2))
|
||||
r_squared = 1.0 if ss_tot == 0.0 else 1.0 - (ss_res / ss_tot)
|
||||
return LinearCostFit(
|
||||
intercept_c0=float(intercept),
|
||||
slope_c1=float(slope),
|
||||
r_squared=float(r_squared),
|
||||
points_used=len(usable),
|
||||
)
|
||||
|
||||
|
||||
def sweep_number_of_loci(
|
||||
params: Track1Parameters,
|
||||
loci_values: Iterable[int],
|
||||
candidate_T_values: Iterable[float],
|
||||
runs: int = 20,
|
||||
seed_start: int = 0,
|
||||
cache_path: str | None = None,
|
||||
jobs: int = 1,
|
||||
) -> NumberOfLociSweep:
|
||||
rows: list[LocusThresholdRow] = []
|
||||
candidate_list = list(candidate_T_values)
|
||||
for index, n_value in enumerate(loci_values):
|
||||
run_params = Track1Parameters(
|
||||
K=params.K,
|
||||
N0=params.N0,
|
||||
n=n_value,
|
||||
u=params.u,
|
||||
R=params.R,
|
||||
T=params.T,
|
||||
epochs=params.epochs,
|
||||
p=params.p,
|
||||
a_max=params.a_max,
|
||||
)
|
||||
result: Optional[ThresholdSearchResult] = search_threshold_over_candidates(
|
||||
params=run_params,
|
||||
candidate_T_values=candidate_list,
|
||||
runs=runs,
|
||||
seed_start=seed_start + (index * 100000),
|
||||
cache_path=cache_path,
|
||||
jobs=jobs,
|
||||
)
|
||||
rows.append(
|
||||
LocusThresholdRow(
|
||||
n=n_value,
|
||||
threshold_T=None if result is None else float(result.threshold_T),
|
||||
accepted=result is not None,
|
||||
)
|
||||
)
|
||||
return NumberOfLociSweep(rows=rows, fit=fit_linear_cost_by_loci(rows))
|
||||
|
|
@ -15,10 +15,10 @@ from pathlib import Path
|
|||
from typing import Any, Optional
|
||||
|
||||
from .legacy import ensure_legacy_python_path
|
||||
from .track1_analysis import summarize_tracking, sweep_number_of_loci
|
||||
|
||||
ensure_legacy_python_path()
|
||||
|
||||
from track1_analysis import summarize_tracking, sweep_number_of_loci
|
||||
from track1_dataset import generate_extinction_dataset
|
||||
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
|
||||
from track1_reference import Track1Parameters, simulate_run
|
||||
|
|
|
|||
|
|
@ -0,0 +1,179 @@
|
|||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = ROOT / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
import renunney.track1_analysis as analysis
|
||||
import renunney.track1_api as api
|
||||
|
||||
from track1_reference import GenerationSummary, Track1Parameters
|
||||
|
||||
|
||||
def test_fit_linear_cost_by_loci_recovers_line():
|
||||
rows = [
|
||||
analysis.LocusThresholdRow(n=1, threshold_T=12.0, accepted=True),
|
||||
analysis.LocusThresholdRow(n=2, threshold_T=14.0, accepted=True),
|
||||
analysis.LocusThresholdRow(n=3, threshold_T=16.0, accepted=True),
|
||||
]
|
||||
fit = analysis.fit_linear_cost_by_loci(rows)
|
||||
assert fit is not None
|
||||
assert abs(fit.intercept_c0 - 10.0) < 1e-9
|
||||
assert abs(fit.slope_c1 - 2.0) < 1e-9
|
||||
|
||||
|
||||
def test_sweep_number_of_loci_uses_search_results(monkeypatch):
|
||||
params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20)
|
||||
|
||||
class Dummy:
|
||||
def __init__(self, threshold_T):
|
||||
self.threshold_T = threshold_T
|
||||
|
||||
def fake_search(params, candidate_T_values, runs=20, seed_start=0, cache_path=None, jobs=1):
|
||||
return Dummy(threshold_T=10.0 + params.n)
|
||||
|
||||
monkeypatch.setattr(analysis, "search_threshold_over_candidates", fake_search)
|
||||
sweep = analysis.sweep_number_of_loci(params, [1, 2, 3], [10, 20, 30], runs=2, seed_start=1, jobs=3)
|
||||
assert [row.threshold_T for row in sweep.rows] == [11.0, 12.0, 13.0]
|
||||
assert sweep.fit is not None
|
||||
|
||||
|
||||
def test_run_config_loci_regression_mode(monkeypatch):
|
||||
@dataclass(frozen=True)
|
||||
class DummyFit:
|
||||
intercept_c0: float = 5.0
|
||||
slope_c1: float = 2.0
|
||||
r_squared: float = 1.0
|
||||
points_used: int = 3
|
||||
|
||||
class DummySweep:
|
||||
rows = [
|
||||
analysis.LocusThresholdRow(n=1, threshold_T=7.0, accepted=True),
|
||||
analysis.LocusThresholdRow(n=2, threshold_T=9.0, accepted=True),
|
||||
analysis.LocusThresholdRow(n=3, threshold_T=11.0, accepted=True),
|
||||
]
|
||||
fit = DummyFit()
|
||||
|
||||
monkeypatch.setattr(api, "sweep_number_of_loci", lambda *args, **kwargs: DummySweep())
|
||||
config = api.Track1RunConfig(
|
||||
mode="loci_regression",
|
||||
loci_values=[1, 2, 3],
|
||||
t_start=10,
|
||||
t_stop=30,
|
||||
t_step=10,
|
||||
runs=2,
|
||||
)
|
||||
payload = api.run_config(config)
|
||||
assert payload["mode"] == "loci_regression"
|
||||
assert payload["loci_values"] == [1, 2, 3]
|
||||
assert payload["fit"]["slope_c1"] == 2.0
|
||||
|
||||
|
||||
def test_summarize_tracking_detects_post_initial_nonzero_alleles():
|
||||
summaries = [
|
||||
GenerationSummary(
|
||||
t=-2,
|
||||
N=10,
|
||||
female_fraction=0.5,
|
||||
male_count=5,
|
||||
female_count=5,
|
||||
fecundity=1.0,
|
||||
mean_fitness=1.0,
|
||||
mean_expected_female_productivity=1.0,
|
||||
target_value=-0.1,
|
||||
mean_allele_value=0.0,
|
||||
mean_genotype_value=0.0,
|
||||
mean_tracking_gap=0.1,
|
||||
paper_M=0.05,
|
||||
expected_mutations_current_N=0.0001,
|
||||
realized_mutation_count=0,
|
||||
realized_mutation_rate_per_allele=0.0,
|
||||
birth_count=0,
|
||||
surviving_offspring_count=0,
|
||||
ne_approx=5.0,
|
||||
extinct=False,
|
||||
),
|
||||
GenerationSummary(
|
||||
t=-1,
|
||||
N=10,
|
||||
female_fraction=0.5,
|
||||
male_count=5,
|
||||
female_count=5,
|
||||
fecundity=1.0,
|
||||
mean_fitness=1.0,
|
||||
mean_expected_female_productivity=1.0,
|
||||
target_value=-0.05,
|
||||
mean_allele_value=0.2,
|
||||
mean_genotype_value=0.2,
|
||||
mean_tracking_gap=0.25,
|
||||
paper_M=0.05,
|
||||
expected_mutations_current_N=0.0001,
|
||||
realized_mutation_count=1,
|
||||
realized_mutation_rate_per_allele=0.05,
|
||||
birth_count=2,
|
||||
surviving_offspring_count=1,
|
||||
ne_approx=5.0,
|
||||
extinct=False,
|
||||
),
|
||||
]
|
||||
tracking = analysis.summarize_tracking(summaries)
|
||||
assert tracking.extinction_occurred is False
|
||||
assert tracking.first_extinction_t is None
|
||||
assert tracking.first_nonzero_allele_t == -1
|
||||
assert tracking.last_nonzero_allele_t == -1
|
||||
assert tracking.stayed_zero_after_initialization is False
|
||||
|
||||
|
||||
def test_summarize_tracking_detects_extinction_time():
|
||||
summaries = [
|
||||
GenerationSummary(
|
||||
t=0,
|
||||
N=10,
|
||||
female_fraction=0.5,
|
||||
male_count=5,
|
||||
female_count=5,
|
||||
fecundity=1.0,
|
||||
mean_fitness=1.0,
|
||||
mean_expected_female_productivity=1.0,
|
||||
target_value=0.0,
|
||||
mean_allele_value=0.0,
|
||||
mean_genotype_value=0.0,
|
||||
mean_tracking_gap=0.0,
|
||||
paper_M=0.05,
|
||||
expected_mutations_current_N=0.0001,
|
||||
realized_mutation_count=0,
|
||||
realized_mutation_rate_per_allele=0.0,
|
||||
birth_count=0,
|
||||
surviving_offspring_count=0,
|
||||
ne_approx=5.0,
|
||||
extinct=False,
|
||||
),
|
||||
GenerationSummary(
|
||||
t=1,
|
||||
N=0,
|
||||
female_fraction=0.0,
|
||||
male_count=0,
|
||||
female_count=0,
|
||||
fecundity=0.0,
|
||||
mean_fitness=0.0,
|
||||
mean_expected_female_productivity=0.0,
|
||||
target_value=0.1,
|
||||
mean_allele_value=0.0,
|
||||
mean_genotype_value=0.0,
|
||||
mean_tracking_gap=-0.1,
|
||||
paper_M=0.05,
|
||||
expected_mutations_current_N=0.0,
|
||||
realized_mutation_count=0,
|
||||
realized_mutation_rate_per_allele=0.0,
|
||||
birth_count=0,
|
||||
surviving_offspring_count=0,
|
||||
ne_approx=0.0,
|
||||
extinct=True,
|
||||
),
|
||||
]
|
||||
tracking = analysis.summarize_tracking(summaries)
|
||||
assert tracking.extinction_occurred is True
|
||||
assert tracking.first_extinction_t == 1
|
||||
|
|
@ -7,7 +7,7 @@ SRC_DIR = ROOT / "src"
|
|||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
from renunney import track1_api as api
|
||||
import renunney.track1_api as api
|
||||
|
||||
|
||||
def test_run_config_simulate_mode_returns_contract():
|
||||
|
|
|
|||
Loading…
Reference in New Issue