Migrate Track 1 analysis layer into renunney

This commit is contained in:
Codex 2026-04-11 06:28:41 -04:00
parent a6d1326165
commit acbb90f452
8 changed files with 380 additions and 10 deletions

View File

@ -22,6 +22,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
- a local orchestration CLI and library,
- local paper-scale Figure 1 submission configs,
- a local Track 1 runner and config/API layer,
- a local Track 1 analysis layer for tracking summaries and loci-regression,
- a Makefile for common tasks,
- migration notes for pulling code into this repo in stages.
@ -82,6 +83,7 @@ The current state is split:
- orchestration control plane: local to `renunney`
- Track 1 runner and config/API layer: local to `renunney`
- Track 1 analysis layer: local to `renunney`
- Track 1 simulation backend: still in the older `cost_of_substitution`
directory and imported through the local compatibility layer

View File

@ -27,17 +27,19 @@ Operational code still lives in:
3. Track 1 runner and API boundary have been migrated locally:
- `scripts/run_track1.py`
- `src/renunney/track1_api.py`
4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
5. Migrate the Track 1 simulation core after the runner path is stable:
4. Track 1 analysis boundary has been migrated locally:
- `src/renunney/track1_analysis.py`
5. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
6. Migrate the Track 1 simulation core after the runner path is stable:
- `python/track1_reference.py`
- `python/track1_threshold.py`
- `python/track1_analysis.py`
6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
7. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
- `python/track1_report.py`
- `python/track1_dataset.py`
- `python/track1_fit.py`
- `python/track1_extinction.py`
7. Migrate docs and example configs last, after path references are updated.
8. Migrate docs and example configs last, after path references are updated.
## Constraint

View File

@ -47,7 +47,8 @@ make status
## Current Assumption
The Makefile now drives the local orchestration code in `renunney`, while the
Track 1 runner/API boundary is also local to `renunney`. The simulation kernel
is still imported from the legacy `cost_of_substitution` directory through the
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
configs used for submission are now local to `renunney/config`.
Track 1 runner/API boundary and analysis layer are also local to `renunney`.
The simulation kernel is still imported from the legacy `cost_of_substitution`
directory through the compatibility layer in `src/renunney/legacy.py`. The
paper-scale Figure 1 configs used for submission are now local to
`renunney/config`.

View File

@ -18,6 +18,15 @@ from .orchestration import (
submit_job_manifest,
submit_track1_figure1_jobs,
)
from .track1_analysis import (
LinearCostFit,
LocusThresholdRow,
NumberOfLociSweep,
TrackingSummary,
fit_linear_cost_by_loci,
summarize_tracking,
sweep_number_of_loci,
)
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
__all__ = [
@ -35,14 +44,21 @@ __all__ = [
"list_job_results",
"list_jobs",
"load_job_manifest",
"LinearCostFit",
"LocusThresholdRow",
"NumberOfLociSweep",
"repo_root",
"run_one_job",
"run_worker_loop",
"submit_job_manifest",
"submit_track1_figure1_jobs",
"TrackingSummary",
"Track1RunConfig",
"config_from_mapping",
"fit_linear_cost_by_loci",
"load_config",
"run_config",
"save_payload",
"summarize_tracking",
"sweep_number_of_loci",
]

View File

@ -0,0 +1,170 @@
"""
track1_analysis.py
Local Track 1 analysis helpers for renunney.
This stage keeps the simulation kernel in the legacy tree while moving the
analysis/reporting boundary inward.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Iterable, Optional
import numpy as np
from .legacy import ensure_legacy_python_path
ensure_legacy_python_path()
from track1_reference import GenerationSummary, Track1Parameters
from track1_threshold import ThresholdSearchResult, search_threshold_over_candidates
@dataclass(frozen=True)
class LocusThresholdRow:
"""Threshold result for one number-of-loci setting."""
n: int
threshold_T: Optional[float]
accepted: bool
@dataclass(frozen=True)
class LinearCostFit:
"""Linear regression C = C0 + n*C1."""
intercept_c0: float
slope_c1: float
r_squared: float
points_used: int
@dataclass(frozen=True)
class NumberOfLociSweep:
"""Combined threshold rows and fitted regression."""
rows: list[LocusThresholdRow]
fit: Optional[LinearCostFit]
@dataclass(frozen=True)
class TrackingSummary:
"""Condensed allele-tracking diagnostics for one simulation run."""
extinction_occurred: bool
first_extinction_t: Optional[int]
first_nonzero_allele_t: Optional[int]
last_nonzero_allele_t: Optional[int]
stayed_zero_after_initialization: bool
max_abs_tracking_gap: float
final_tracking_gap: float
mean_abs_tracking_gap: float
final_mean_allele_value: float
final_target_value: float
def summarize_tracking(
summaries: Iterable[GenerationSummary],
zero_tol: float = 1.0e-15,
) -> TrackingSummary:
summary_list = list(summaries)
if not summary_list:
return TrackingSummary(
extinction_occurred=False,
first_extinction_t=None,
first_nonzero_allele_t=None,
last_nonzero_allele_t=None,
stayed_zero_after_initialization=True,
max_abs_tracking_gap=0.0,
final_tracking_gap=0.0,
mean_abs_tracking_gap=0.0,
final_mean_allele_value=0.0,
final_target_value=0.0,
)
nonzero = [summary for summary in summary_list if abs(summary.mean_allele_value) > zero_tol]
extinct_rows = [summary for summary in summary_list if summary.extinct]
first_nonzero_t = None if not nonzero else int(nonzero[0].t)
last_nonzero_t = None if not nonzero else int(nonzero[-1].t)
first_extinction_t = None if not extinct_rows else int(extinct_rows[0].t)
post_init = summary_list[1:]
stayed_zero_after_initialization = all(
abs(summary.mean_allele_value) <= zero_tol for summary in post_init
)
abs_gaps = np.array([abs(summary.mean_tracking_gap) for summary in summary_list], dtype=float)
final = summary_list[-1]
return TrackingSummary(
extinction_occurred=bool(extinct_rows),
first_extinction_t=first_extinction_t,
first_nonzero_allele_t=first_nonzero_t,
last_nonzero_allele_t=last_nonzero_t,
stayed_zero_after_initialization=stayed_zero_after_initialization,
max_abs_tracking_gap=float(np.max(abs_gaps)),
final_tracking_gap=float(final.mean_tracking_gap),
mean_abs_tracking_gap=float(np.mean(abs_gaps)),
final_mean_allele_value=float(final.mean_allele_value),
final_target_value=float(final.target_value),
)
def fit_linear_cost_by_loci(rows: Iterable[LocusThresholdRow]) -> Optional[LinearCostFit]:
usable = [row for row in rows if row.accepted and row.threshold_T is not None]
if len(usable) < 2:
return None
x = np.array([row.n for row in usable], dtype=float)
y = np.array([row.threshold_T for row in usable], dtype=float)
slope, intercept = np.polyfit(x, y, 1)
yhat = intercept + slope * x
ss_res = float(np.sum((y - yhat) ** 2))
ss_tot = float(np.sum((y - np.mean(y)) ** 2))
r_squared = 1.0 if ss_tot == 0.0 else 1.0 - (ss_res / ss_tot)
return LinearCostFit(
intercept_c0=float(intercept),
slope_c1=float(slope),
r_squared=float(r_squared),
points_used=len(usable),
)
def sweep_number_of_loci(
params: Track1Parameters,
loci_values: Iterable[int],
candidate_T_values: Iterable[float],
runs: int = 20,
seed_start: int = 0,
cache_path: str | None = None,
jobs: int = 1,
) -> NumberOfLociSweep:
rows: list[LocusThresholdRow] = []
candidate_list = list(candidate_T_values)
for index, n_value in enumerate(loci_values):
run_params = Track1Parameters(
K=params.K,
N0=params.N0,
n=n_value,
u=params.u,
R=params.R,
T=params.T,
epochs=params.epochs,
p=params.p,
a_max=params.a_max,
)
result: Optional[ThresholdSearchResult] = search_threshold_over_candidates(
params=run_params,
candidate_T_values=candidate_list,
runs=runs,
seed_start=seed_start + (index * 100000),
cache_path=cache_path,
jobs=jobs,
)
rows.append(
LocusThresholdRow(
n=n_value,
threshold_T=None if result is None else float(result.threshold_T),
accepted=result is not None,
)
)
return NumberOfLociSweep(rows=rows, fit=fit_linear_cost_by_loci(rows))

View File

@ -15,10 +15,10 @@ from pathlib import Path
from typing import Any, Optional
from .legacy import ensure_legacy_python_path
from .track1_analysis import summarize_tracking, sweep_number_of_loci
ensure_legacy_python_path()
from track1_analysis import summarize_tracking, sweep_number_of_loci
from track1_dataset import generate_extinction_dataset
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
from track1_reference import Track1Parameters, simulate_run

View File

@ -0,0 +1,179 @@
import sys
from dataclasses import dataclass
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
import renunney.track1_analysis as analysis
import renunney.track1_api as api
from track1_reference import GenerationSummary, Track1Parameters
def test_fit_linear_cost_by_loci_recovers_line():
rows = [
analysis.LocusThresholdRow(n=1, threshold_T=12.0, accepted=True),
analysis.LocusThresholdRow(n=2, threshold_T=14.0, accepted=True),
analysis.LocusThresholdRow(n=3, threshold_T=16.0, accepted=True),
]
fit = analysis.fit_linear_cost_by_loci(rows)
assert fit is not None
assert abs(fit.intercept_c0 - 10.0) < 1e-9
assert abs(fit.slope_c1 - 2.0) < 1e-9
def test_sweep_number_of_loci_uses_search_results(monkeypatch):
params = Track1Parameters(K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20)
class Dummy:
def __init__(self, threshold_T):
self.threshold_T = threshold_T
def fake_search(params, candidate_T_values, runs=20, seed_start=0, cache_path=None, jobs=1):
return Dummy(threshold_T=10.0 + params.n)
monkeypatch.setattr(analysis, "search_threshold_over_candidates", fake_search)
sweep = analysis.sweep_number_of_loci(params, [1, 2, 3], [10, 20, 30], runs=2, seed_start=1, jobs=3)
assert [row.threshold_T for row in sweep.rows] == [11.0, 12.0, 13.0]
assert sweep.fit is not None
def test_run_config_loci_regression_mode(monkeypatch):
@dataclass(frozen=True)
class DummyFit:
intercept_c0: float = 5.0
slope_c1: float = 2.0
r_squared: float = 1.0
points_used: int = 3
class DummySweep:
rows = [
analysis.LocusThresholdRow(n=1, threshold_T=7.0, accepted=True),
analysis.LocusThresholdRow(n=2, threshold_T=9.0, accepted=True),
analysis.LocusThresholdRow(n=3, threshold_T=11.0, accepted=True),
]
fit = DummyFit()
monkeypatch.setattr(api, "sweep_number_of_loci", lambda *args, **kwargs: DummySweep())
config = api.Track1RunConfig(
mode="loci_regression",
loci_values=[1, 2, 3],
t_start=10,
t_stop=30,
t_step=10,
runs=2,
)
payload = api.run_config(config)
assert payload["mode"] == "loci_regression"
assert payload["loci_values"] == [1, 2, 3]
assert payload["fit"]["slope_c1"] == 2.0
def test_summarize_tracking_detects_post_initial_nonzero_alleles():
summaries = [
GenerationSummary(
t=-2,
N=10,
female_fraction=0.5,
male_count=5,
female_count=5,
fecundity=1.0,
mean_fitness=1.0,
mean_expected_female_productivity=1.0,
target_value=-0.1,
mean_allele_value=0.0,
mean_genotype_value=0.0,
mean_tracking_gap=0.1,
paper_M=0.05,
expected_mutations_current_N=0.0001,
realized_mutation_count=0,
realized_mutation_rate_per_allele=0.0,
birth_count=0,
surviving_offspring_count=0,
ne_approx=5.0,
extinct=False,
),
GenerationSummary(
t=-1,
N=10,
female_fraction=0.5,
male_count=5,
female_count=5,
fecundity=1.0,
mean_fitness=1.0,
mean_expected_female_productivity=1.0,
target_value=-0.05,
mean_allele_value=0.2,
mean_genotype_value=0.2,
mean_tracking_gap=0.25,
paper_M=0.05,
expected_mutations_current_N=0.0001,
realized_mutation_count=1,
realized_mutation_rate_per_allele=0.05,
birth_count=2,
surviving_offspring_count=1,
ne_approx=5.0,
extinct=False,
),
]
tracking = analysis.summarize_tracking(summaries)
assert tracking.extinction_occurred is False
assert tracking.first_extinction_t is None
assert tracking.first_nonzero_allele_t == -1
assert tracking.last_nonzero_allele_t == -1
assert tracking.stayed_zero_after_initialization is False
def test_summarize_tracking_detects_extinction_time():
summaries = [
GenerationSummary(
t=0,
N=10,
female_fraction=0.5,
male_count=5,
female_count=5,
fecundity=1.0,
mean_fitness=1.0,
mean_expected_female_productivity=1.0,
target_value=0.0,
mean_allele_value=0.0,
mean_genotype_value=0.0,
mean_tracking_gap=0.0,
paper_M=0.05,
expected_mutations_current_N=0.0001,
realized_mutation_count=0,
realized_mutation_rate_per_allele=0.0,
birth_count=0,
surviving_offspring_count=0,
ne_approx=5.0,
extinct=False,
),
GenerationSummary(
t=1,
N=0,
female_fraction=0.0,
male_count=0,
female_count=0,
fecundity=0.0,
mean_fitness=0.0,
mean_expected_female_productivity=0.0,
target_value=0.1,
mean_allele_value=0.0,
mean_genotype_value=0.0,
mean_tracking_gap=-0.1,
paper_M=0.05,
expected_mutations_current_N=0.0,
realized_mutation_count=0,
realized_mutation_rate_per_allele=0.0,
birth_count=0,
surviving_offspring_count=0,
ne_approx=0.0,
extinct=True,
),
]
tracking = analysis.summarize_tracking(summaries)
assert tracking.extinction_occurred is True
assert tracking.first_extinction_t == 1

View File

@ -7,7 +7,7 @@ SRC_DIR = ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from renunney import track1_api as api
import renunney.track1_api as api
def test_run_config_simulate_mode_returns_contract():