Migrate Track 1 dataset layer into renunney

This commit is contained in:
Codex 2026-04-11 06:50:53 -04:00
parent 7ea94aa7fd
commit aefd4e4ccb
7 changed files with 180 additions and 10 deletions

View File

@ -27,6 +27,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
- a local Track 1 simulation kernel,
- a local Track 1 report generator,
- a local Track 1 extinction-model data layer,
- a local Track 1 dataset generator,
- a Makefile for common tasks,
- migration notes for pulling code into this repo in stages.
@ -92,7 +93,8 @@ The current state is split:
- Track 1 simulation kernel: local to `renunney`
- Track 1 report generator: local to `renunney`
- Track 1 extinction-model data layer: local to `renunney`
- Track 1 dataset and fit helpers: still imported
- Track 1 dataset generator: local to `renunney`
- Track 1 fit helper: still imported
from the older `cost_of_substitution` directory through the local
compatibility layer

View File

@ -37,11 +37,12 @@ Operational code still lives in:
- `src/renunney/track1_report.py`
8. Track 1 extinction-model data layer has been migrated locally:
- `src/renunney/track1_extinction.py`
9. Migrate dataset and fit modules next:
- `python/track1_dataset.py`
9. Track 1 dataset generator has been migrated locally:
- `src/renunney/track1_dataset.py`
10. Migrate the fit module next:
- `python/track1_fit.py`
10. Reduce or remove the remaining compatibility-layer imports after those modules are local.
11. Migrate docs and example configs last, after path references are updated.
11. Reduce or remove the remaining compatibility-layer imports after those modules are local.
12. Migrate docs and example configs last, after path references are updated.
## Constraint

View File

@ -49,7 +49,8 @@ make status
The Makefile now drives the local orchestration code in `renunney`, while the
Track 1 runner/API boundary, analysis layer, threshold/search layer, and
simulation kernel, report generator, and extinction-model data layer are also
local to `renunney`. The remaining Track 1 dataset/fit helpers are still
imported from the legacy `cost_of_substitution` directory through the
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
configs used for submission are now local to `renunney/config`.
local to `renunney`, and the dataset generator is now local as well. The
remaining Track 1 fit helper is still imported from the legacy
`cost_of_substitution` directory through the compatibility layer in
`src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission
are now local to `renunney/config`.

View File

@ -28,6 +28,7 @@ from .track1_analysis import (
sweep_number_of_loci,
)
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
from .track1_dataset import GRID_KEYS, generate_extinction_dataset
from .track1_extinction import (
ExtinctionGenerationRow,
ExtinctionRunRow,
@ -113,6 +114,7 @@ __all__ = [
"expected_female_productivity",
"expected_mutations_for_population",
"evaluate_threshold_candidate",
"generate_extinction_dataset",
"ExtinctionGenerationRow",
"ExtinctionRunRow",
"female_fecundity",
@ -121,6 +123,7 @@ __all__ = [
"generation_metrics",
"genotype_fitness",
"generate_report_bundle",
"GRID_KEYS",
"initialize_population",
"is_extinct",
"build_extinction_generation_rows",

View File

@ -16,13 +16,13 @@ from typing import Any, Optional
from .legacy import ensure_legacy_python_path
from .track1_analysis import summarize_tracking, sweep_number_of_loci
from .track1_dataset import generate_extinction_dataset
from .track1_reference import Track1Parameters, simulate_run
from .track1_report import generate_report_bundle
from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
ensure_legacy_python_path()
from track1_dataset import generate_extinction_dataset
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl

View File

@ -0,0 +1,107 @@
"""
track1_dataset.py
Dataset generation for extinction-risk analysis on top of Track 1 simulations.
"""
from __future__ import annotations
from dataclasses import asdict
import itertools
import json
from pathlib import Path
from typing import Any
from .track1_extinction import build_extinction_generation_rows, build_extinction_run_row, save_jsonl
from .track1_reference import Track1Parameters, simulate_run
GRID_KEYS = ("K", "N0", "n", "u", "R", "T", "epochs", "p", "a_max")
def _grid_axes_from_config(params: Track1Parameters, grid: dict[str, list[Any]] | None) -> dict[str, list[Any]]:
base = {
"K": [params.K],
"N0": [params.N0],
"n": [params.n],
"u": [params.u],
"R": [params.R],
"T": [params.T],
"epochs": [params.epochs],
"p": [params.p],
"a_max": [params.a_max],
}
if not grid:
return base
for key, values in grid.items():
if key not in GRID_KEYS:
raise ValueError(f"Unsupported extinction dataset grid key: {key}")
if not isinstance(values, list) or len(values) == 0:
raise ValueError(f"Grid key {key} must map to a non-empty list.")
base[key] = values
return base
def generate_extinction_dataset(
params: Track1Parameters,
runs: int,
seed_start: int,
dataset_dir: str | Path,
grid: dict[str, list[Any]] | None = None,
) -> dict[str, Any]:
outdir = Path(dataset_dir)
outdir.mkdir(parents=True, exist_ok=True)
axes = _grid_axes_from_config(params, grid)
combinations = list(itertools.product(*(axes[key] for key in GRID_KEYS)))
generation_rows = []
run_rows = []
treatment_rows = []
for treatment_index, values in enumerate(combinations):
combo = dict(zip(GRID_KEYS, values))
run_params = Track1Parameters(
K=int(combo["K"]),
N0=int(combo["N0"]),
n=int(combo["n"]),
u=float(combo["u"]),
R=float(combo["R"]),
T=int(combo["T"]),
epochs=int(combo["epochs"]),
p=float(combo["p"]),
a_max=None if combo["a_max"] is None else int(combo["a_max"]),
)
treatment_rows.append(
{
"treatment_index": treatment_index,
**asdict(run_params),
"M": run_params.M,
"runs": runs,
}
)
for run_offset in range(runs):
seed = seed_start + (treatment_index * runs) + run_offset
summaries = simulate_run(run_params, seed=seed)
generation_rows.extend(build_extinction_generation_rows(run_params, summaries, seed=seed))
run_rows.append(build_extinction_run_row(run_params, summaries, seed=seed))
save_jsonl(generation_rows, outdir / "generation_rows.jsonl")
save_jsonl(run_rows, outdir / "run_rows.jsonl")
(outdir / "treatments.json").write_text(
json.dumps(treatment_rows, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
metadata = {
"dataset_dir": str(outdir),
"generation_rows_path": str(outdir / "generation_rows.jsonl"),
"run_rows_path": str(outdir / "run_rows.jsonl"),
"treatments_path": str(outdir / "treatments.json"),
"treatment_count": len(treatment_rows),
"run_row_count": len(run_rows),
"generation_row_count": len(generation_rows),
"runs_per_treatment": runs,
"seed_start": seed_start,
"grid": axes,
}
(outdir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return metadata

View File

@ -0,0 +1,56 @@
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
import renunney.track1_api as api
import renunney.track1_dataset as ds
import renunney.track1_reference as ref
def test_generate_extinction_dataset_writes_expected_files(tmp_path: Path):
params = ref.Track1Parameters(K=500, N0=20, n=1, u=0.001, R=10.0, T=10, epochs=1)
dataset_dir = tmp_path / "dataset"
payload = ds.generate_extinction_dataset(
params=params,
runs=1,
seed_start=1,
dataset_dir=dataset_dir,
grid={"N0": [20, 500], "u": [0.001, 0.005]},
)
assert payload["treatment_count"] == 4
assert payload["run_row_count"] == 4
assert Path(payload["generation_rows_path"]).exists()
assert Path(payload["run_rows_path"]).exists()
assert Path(payload["treatments_path"]).exists()
metadata = json.loads((dataset_dir / "metadata.json").read_text(encoding="utf-8"))
assert metadata["treatment_count"] == 4
def test_run_config_extinction_dataset_mode(tmp_path: Path):
dataset_dir = tmp_path / "dataset"
config = api.Track1RunConfig(
mode="extinction_dataset",
K=500,
N0=20,
n=1,
u=0.001,
R=10.0,
T=10,
epochs=1,
runs=1,
seed=1,
dataset_dir=str(dataset_dir),
grid={"u": [0.001, 0.005]},
)
payload = api.run_config(config)
assert payload["mode"] == "extinction_dataset"
assert payload["parameters"]["u"] == 0.001
assert payload["parameters"]["M"] == 1.0
assert payload["treatment_count"] == 2
assert (dataset_dir / "run_rows.jsonl").exists()
assert (dataset_dir / "generation_rows.jsonl").exists()