Migrate Track 1 dataset layer into renunney

This commit is contained in:
Codex 2026-04-11 06:50:53 -04:00
parent 7ea94aa7fd
commit aefd4e4ccb
7 changed files with 180 additions and 10 deletions

View File

@ -27,6 +27,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
- a local Track 1 simulation kernel, - a local Track 1 simulation kernel,
- a local Track 1 report generator, - a local Track 1 report generator,
- a local Track 1 extinction-model data layer, - a local Track 1 extinction-model data layer,
- a local Track 1 dataset generator,
- a Makefile for common tasks, - a Makefile for common tasks,
- migration notes for pulling code into this repo in stages. - migration notes for pulling code into this repo in stages.
@ -92,7 +93,8 @@ The current state is split:
- Track 1 simulation kernel: local to `renunney` - Track 1 simulation kernel: local to `renunney`
- Track 1 report generator: local to `renunney` - Track 1 report generator: local to `renunney`
- Track 1 extinction-model data layer: local to `renunney` - Track 1 extinction-model data layer: local to `renunney`
- Track 1 dataset and fit helpers: still imported - Track 1 dataset generator: local to `renunney`
- Track 1 fit helper: still imported
from the older `cost_of_substitution` directory through the local from the older `cost_of_substitution` directory through the local
compatibility layer compatibility layer

View File

@ -37,11 +37,12 @@ Operational code still lives in:
- `src/renunney/track1_report.py` - `src/renunney/track1_report.py`
8. Track 1 extinction-model data layer has been migrated locally: 8. Track 1 extinction-model data layer has been migrated locally:
- `src/renunney/track1_extinction.py` - `src/renunney/track1_extinction.py`
9. Migrate dataset and fit modules next: 9. Track 1 dataset generator has been migrated locally:
- `python/track1_dataset.py` - `src/renunney/track1_dataset.py`
10. Migrate the fit module next:
- `python/track1_fit.py` - `python/track1_fit.py`
10. Reduce or remove the remaining compatibility-layer imports after those modules are local. 11. Reduce or remove the remaining compatibility-layer imports after those modules are local.
11. Migrate docs and example configs last, after path references are updated. 12. Migrate docs and example configs last, after path references are updated.
## Constraint ## Constraint

View File

@ -49,7 +49,8 @@ make status
The Makefile now drives the local orchestration code in `renunney`, while the The Makefile now drives the local orchestration code in `renunney`, while the
Track 1 runner/API boundary, analysis layer, threshold/search layer, and Track 1 runner/API boundary, analysis layer, threshold/search layer, and
simulation kernel, report generator, and extinction-model data layer are also simulation kernel, report generator, and extinction-model data layer are also
local to `renunney`. The remaining Track 1 dataset/fit helpers are still local to `renunney`, and the dataset generator is now local as well. The
imported from the legacy `cost_of_substitution` directory through the remaining Track 1 fit helper is still imported from the legacy
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1 `cost_of_substitution` directory through the compatibility layer in
configs used for submission are now local to `renunney/config`. `src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission
are now local to `renunney/config`.

View File

@ -28,6 +28,7 @@ from .track1_analysis import (
sweep_number_of_loci, sweep_number_of_loci,
) )
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
from .track1_dataset import GRID_KEYS, generate_extinction_dataset
from .track1_extinction import ( from .track1_extinction import (
ExtinctionGenerationRow, ExtinctionGenerationRow,
ExtinctionRunRow, ExtinctionRunRow,
@ -113,6 +114,7 @@ __all__ = [
"expected_female_productivity", "expected_female_productivity",
"expected_mutations_for_population", "expected_mutations_for_population",
"evaluate_threshold_candidate", "evaluate_threshold_candidate",
"generate_extinction_dataset",
"ExtinctionGenerationRow", "ExtinctionGenerationRow",
"ExtinctionRunRow", "ExtinctionRunRow",
"female_fecundity", "female_fecundity",
@ -121,6 +123,7 @@ __all__ = [
"generation_metrics", "generation_metrics",
"genotype_fitness", "genotype_fitness",
"generate_report_bundle", "generate_report_bundle",
"GRID_KEYS",
"initialize_population", "initialize_population",
"is_extinct", "is_extinct",
"build_extinction_generation_rows", "build_extinction_generation_rows",

View File

@ -16,13 +16,13 @@ from typing import Any, Optional
from .legacy import ensure_legacy_python_path from .legacy import ensure_legacy_python_path
from .track1_analysis import summarize_tracking, sweep_number_of_loci from .track1_analysis import summarize_tracking, sweep_number_of_loci
from .track1_dataset import generate_extinction_dataset
from .track1_reference import Track1Parameters, simulate_run from .track1_reference import Track1Parameters, simulate_run
from .track1_report import generate_report_bundle from .track1_report import generate_report_bundle
from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
ensure_legacy_python_path() ensure_legacy_python_path()
from track1_dataset import generate_extinction_dataset
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl

View File

@ -0,0 +1,107 @@
"""
track1_dataset.py
Dataset generation for extinction-risk analysis on top of Track 1 simulations.
"""
from __future__ import annotations
from dataclasses import asdict
import itertools
import json
from pathlib import Path
from typing import Any
from .track1_extinction import build_extinction_generation_rows, build_extinction_run_row, save_jsonl
from .track1_reference import Track1Parameters, simulate_run
GRID_KEYS = ("K", "N0", "n", "u", "R", "T", "epochs", "p", "a_max")
def _grid_axes_from_config(params: Track1Parameters, grid: dict[str, list[Any]] | None) -> dict[str, list[Any]]:
base = {
"K": [params.K],
"N0": [params.N0],
"n": [params.n],
"u": [params.u],
"R": [params.R],
"T": [params.T],
"epochs": [params.epochs],
"p": [params.p],
"a_max": [params.a_max],
}
if not grid:
return base
for key, values in grid.items():
if key not in GRID_KEYS:
raise ValueError(f"Unsupported extinction dataset grid key: {key}")
if not isinstance(values, list) or len(values) == 0:
raise ValueError(f"Grid key {key} must map to a non-empty list.")
base[key] = values
return base
def generate_extinction_dataset(
params: Track1Parameters,
runs: int,
seed_start: int,
dataset_dir: str | Path,
grid: dict[str, list[Any]] | None = None,
) -> dict[str, Any]:
outdir = Path(dataset_dir)
outdir.mkdir(parents=True, exist_ok=True)
axes = _grid_axes_from_config(params, grid)
combinations = list(itertools.product(*(axes[key] for key in GRID_KEYS)))
generation_rows = []
run_rows = []
treatment_rows = []
for treatment_index, values in enumerate(combinations):
combo = dict(zip(GRID_KEYS, values))
run_params = Track1Parameters(
K=int(combo["K"]),
N0=int(combo["N0"]),
n=int(combo["n"]),
u=float(combo["u"]),
R=float(combo["R"]),
T=int(combo["T"]),
epochs=int(combo["epochs"]),
p=float(combo["p"]),
a_max=None if combo["a_max"] is None else int(combo["a_max"]),
)
treatment_rows.append(
{
"treatment_index": treatment_index,
**asdict(run_params),
"M": run_params.M,
"runs": runs,
}
)
for run_offset in range(runs):
seed = seed_start + (treatment_index * runs) + run_offset
summaries = simulate_run(run_params, seed=seed)
generation_rows.extend(build_extinction_generation_rows(run_params, summaries, seed=seed))
run_rows.append(build_extinction_run_row(run_params, summaries, seed=seed))
save_jsonl(generation_rows, outdir / "generation_rows.jsonl")
save_jsonl(run_rows, outdir / "run_rows.jsonl")
(outdir / "treatments.json").write_text(
json.dumps(treatment_rows, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
metadata = {
"dataset_dir": str(outdir),
"generation_rows_path": str(outdir / "generation_rows.jsonl"),
"run_rows_path": str(outdir / "run_rows.jsonl"),
"treatments_path": str(outdir / "treatments.json"),
"treatment_count": len(treatment_rows),
"run_row_count": len(run_rows),
"generation_row_count": len(generation_rows),
"runs_per_treatment": runs,
"seed_start": seed_start,
"grid": axes,
}
(outdir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return metadata

View File

@ -0,0 +1,56 @@
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
import renunney.track1_api as api
import renunney.track1_dataset as ds
import renunney.track1_reference as ref
def test_generate_extinction_dataset_writes_expected_files(tmp_path: Path):
params = ref.Track1Parameters(K=500, N0=20, n=1, u=0.001, R=10.0, T=10, epochs=1)
dataset_dir = tmp_path / "dataset"
payload = ds.generate_extinction_dataset(
params=params,
runs=1,
seed_start=1,
dataset_dir=dataset_dir,
grid={"N0": [20, 500], "u": [0.001, 0.005]},
)
assert payload["treatment_count"] == 4
assert payload["run_row_count"] == 4
assert Path(payload["generation_rows_path"]).exists()
assert Path(payload["run_rows_path"]).exists()
assert Path(payload["treatments_path"]).exists()
metadata = json.loads((dataset_dir / "metadata.json").read_text(encoding="utf-8"))
assert metadata["treatment_count"] == 4
def test_run_config_extinction_dataset_mode(tmp_path: Path):
dataset_dir = tmp_path / "dataset"
config = api.Track1RunConfig(
mode="extinction_dataset",
K=500,
N0=20,
n=1,
u=0.001,
R=10.0,
T=10,
epochs=1,
runs=1,
seed=1,
dataset_dir=str(dataset_dir),
grid={"u": [0.001, 0.005]},
)
payload = api.run_config(config)
assert payload["mode"] == "extinction_dataset"
assert payload["parameters"]["u"] == 0.001
assert payload["parameters"]["M"] == 1.0
assert payload["treatment_count"] == 2
assert (dataset_dir / "run_rows.jsonl").exists()
assert (dataset_dir / "generation_rows.jsonl").exists()