Migrate Track 1 dataset layer into renunney
This commit is contained in:
parent
7ea94aa7fd
commit
aefd4e4ccb
|
|
@ -27,6 +27,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
|
|||
- a local Track 1 simulation kernel,
|
||||
- a local Track 1 report generator,
|
||||
- a local Track 1 extinction-model data layer,
|
||||
- a local Track 1 dataset generator,
|
||||
- a Makefile for common tasks,
|
||||
- migration notes for pulling code into this repo in stages.
|
||||
|
||||
|
|
@ -92,7 +93,8 @@ The current state is split:
|
|||
- Track 1 simulation kernel: local to `renunney`
|
||||
- Track 1 report generator: local to `renunney`
|
||||
- Track 1 extinction-model data layer: local to `renunney`
|
||||
- Track 1 dataset and fit helpers: still imported
|
||||
- Track 1 dataset generator: local to `renunney`
|
||||
- Track 1 fit helper: still imported
|
||||
from the older `cost_of_substitution` directory through the local
|
||||
compatibility layer
|
||||
|
||||
|
|
|
|||
|
|
@ -37,11 +37,12 @@ Operational code still lives in:
|
|||
- `src/renunney/track1_report.py`
|
||||
8. Track 1 extinction-model data layer has been migrated locally:
|
||||
- `src/renunney/track1_extinction.py`
|
||||
9. Migrate dataset and fit modules next:
|
||||
- `python/track1_dataset.py`
|
||||
9. Track 1 dataset generator has been migrated locally:
|
||||
- `src/renunney/track1_dataset.py`
|
||||
10. Migrate the fit module next:
|
||||
- `python/track1_fit.py`
|
||||
10. Reduce or remove the remaining compatibility-layer imports after those modules are local.
|
||||
11. Migrate docs and example configs last, after path references are updated.
|
||||
11. Reduce or remove the remaining compatibility-layer imports after those modules are local.
|
||||
12. Migrate docs and example configs last, after path references are updated.
|
||||
|
||||
## Constraint
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,8 @@ make status
|
|||
The Makefile now drives the local orchestration code in `renunney`, while the
|
||||
Track 1 runner/API boundary, analysis layer, threshold/search layer, and
|
||||
simulation kernel, report generator, and extinction-model data layer are also
|
||||
local to `renunney`. The remaining Track 1 dataset/fit helpers are still
|
||||
imported from the legacy `cost_of_substitution` directory through the
|
||||
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
|
||||
configs used for submission are now local to `renunney/config`.
|
||||
local to `renunney`, and the dataset generator is now local as well. The
|
||||
remaining Track 1 fit helper is still imported from the legacy
|
||||
`cost_of_substitution` directory through the compatibility layer in
|
||||
`src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission
|
||||
are now local to `renunney/config`.
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from .track1_analysis import (
|
|||
sweep_number_of_loci,
|
||||
)
|
||||
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
|
||||
from .track1_dataset import GRID_KEYS, generate_extinction_dataset
|
||||
from .track1_extinction import (
|
||||
ExtinctionGenerationRow,
|
||||
ExtinctionRunRow,
|
||||
|
|
@ -113,6 +114,7 @@ __all__ = [
|
|||
"expected_female_productivity",
|
||||
"expected_mutations_for_population",
|
||||
"evaluate_threshold_candidate",
|
||||
"generate_extinction_dataset",
|
||||
"ExtinctionGenerationRow",
|
||||
"ExtinctionRunRow",
|
||||
"female_fecundity",
|
||||
|
|
@ -121,6 +123,7 @@ __all__ = [
|
|||
"generation_metrics",
|
||||
"genotype_fitness",
|
||||
"generate_report_bundle",
|
||||
"GRID_KEYS",
|
||||
"initialize_population",
|
||||
"is_extinct",
|
||||
"build_extinction_generation_rows",
|
||||
|
|
|
|||
|
|
@ -16,13 +16,13 @@ from typing import Any, Optional
|
|||
|
||||
from .legacy import ensure_legacy_python_path
|
||||
from .track1_analysis import summarize_tracking, sweep_number_of_loci
|
||||
from .track1_dataset import generate_extinction_dataset
|
||||
from .track1_reference import Track1Parameters, simulate_run
|
||||
from .track1_report import generate_report_bundle
|
||||
from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
|
||||
|
||||
ensure_legacy_python_path()
|
||||
|
||||
from track1_dataset import generate_extinction_dataset
|
||||
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,107 @@
|
|||
"""
|
||||
track1_dataset.py
|
||||
|
||||
Dataset generation for extinction-risk analysis on top of Track 1 simulations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
import itertools
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .track1_extinction import build_extinction_generation_rows, build_extinction_run_row, save_jsonl
|
||||
from .track1_reference import Track1Parameters, simulate_run
|
||||
|
||||
|
||||
GRID_KEYS = ("K", "N0", "n", "u", "R", "T", "epochs", "p", "a_max")
|
||||
|
||||
|
||||
def _grid_axes_from_config(params: Track1Parameters, grid: dict[str, list[Any]] | None) -> dict[str, list[Any]]:
|
||||
base = {
|
||||
"K": [params.K],
|
||||
"N0": [params.N0],
|
||||
"n": [params.n],
|
||||
"u": [params.u],
|
||||
"R": [params.R],
|
||||
"T": [params.T],
|
||||
"epochs": [params.epochs],
|
||||
"p": [params.p],
|
||||
"a_max": [params.a_max],
|
||||
}
|
||||
if not grid:
|
||||
return base
|
||||
for key, values in grid.items():
|
||||
if key not in GRID_KEYS:
|
||||
raise ValueError(f"Unsupported extinction dataset grid key: {key}")
|
||||
if not isinstance(values, list) or len(values) == 0:
|
||||
raise ValueError(f"Grid key {key} must map to a non-empty list.")
|
||||
base[key] = values
|
||||
return base
|
||||
|
||||
|
||||
def generate_extinction_dataset(
|
||||
params: Track1Parameters,
|
||||
runs: int,
|
||||
seed_start: int,
|
||||
dataset_dir: str | Path,
|
||||
grid: dict[str, list[Any]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
outdir = Path(dataset_dir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
axes = _grid_axes_from_config(params, grid)
|
||||
combinations = list(itertools.product(*(axes[key] for key in GRID_KEYS)))
|
||||
generation_rows = []
|
||||
run_rows = []
|
||||
treatment_rows = []
|
||||
|
||||
for treatment_index, values in enumerate(combinations):
|
||||
combo = dict(zip(GRID_KEYS, values))
|
||||
run_params = Track1Parameters(
|
||||
K=int(combo["K"]),
|
||||
N0=int(combo["N0"]),
|
||||
n=int(combo["n"]),
|
||||
u=float(combo["u"]),
|
||||
R=float(combo["R"]),
|
||||
T=int(combo["T"]),
|
||||
epochs=int(combo["epochs"]),
|
||||
p=float(combo["p"]),
|
||||
a_max=None if combo["a_max"] is None else int(combo["a_max"]),
|
||||
)
|
||||
treatment_rows.append(
|
||||
{
|
||||
"treatment_index": treatment_index,
|
||||
**asdict(run_params),
|
||||
"M": run_params.M,
|
||||
"runs": runs,
|
||||
}
|
||||
)
|
||||
for run_offset in range(runs):
|
||||
seed = seed_start + (treatment_index * runs) + run_offset
|
||||
summaries = simulate_run(run_params, seed=seed)
|
||||
generation_rows.extend(build_extinction_generation_rows(run_params, summaries, seed=seed))
|
||||
run_rows.append(build_extinction_run_row(run_params, summaries, seed=seed))
|
||||
|
||||
save_jsonl(generation_rows, outdir / "generation_rows.jsonl")
|
||||
save_jsonl(run_rows, outdir / "run_rows.jsonl")
|
||||
(outdir / "treatments.json").write_text(
|
||||
json.dumps(treatment_rows, indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
metadata = {
|
||||
"dataset_dir": str(outdir),
|
||||
"generation_rows_path": str(outdir / "generation_rows.jsonl"),
|
||||
"run_rows_path": str(outdir / "run_rows.jsonl"),
|
||||
"treatments_path": str(outdir / "treatments.json"),
|
||||
"treatment_count": len(treatment_rows),
|
||||
"run_row_count": len(run_rows),
|
||||
"generation_row_count": len(generation_rows),
|
||||
"runs_per_treatment": runs,
|
||||
"seed_start": seed_start,
|
||||
"grid": axes,
|
||||
}
|
||||
(outdir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
return metadata
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = ROOT / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
import renunney.track1_api as api
|
||||
import renunney.track1_dataset as ds
|
||||
import renunney.track1_reference as ref
|
||||
|
||||
|
||||
def test_generate_extinction_dataset_writes_expected_files(tmp_path: Path):
|
||||
params = ref.Track1Parameters(K=500, N0=20, n=1, u=0.001, R=10.0, T=10, epochs=1)
|
||||
dataset_dir = tmp_path / "dataset"
|
||||
payload = ds.generate_extinction_dataset(
|
||||
params=params,
|
||||
runs=1,
|
||||
seed_start=1,
|
||||
dataset_dir=dataset_dir,
|
||||
grid={"N0": [20, 500], "u": [0.001, 0.005]},
|
||||
)
|
||||
assert payload["treatment_count"] == 4
|
||||
assert payload["run_row_count"] == 4
|
||||
assert Path(payload["generation_rows_path"]).exists()
|
||||
assert Path(payload["run_rows_path"]).exists()
|
||||
assert Path(payload["treatments_path"]).exists()
|
||||
metadata = json.loads((dataset_dir / "metadata.json").read_text(encoding="utf-8"))
|
||||
assert metadata["treatment_count"] == 4
|
||||
|
||||
|
||||
def test_run_config_extinction_dataset_mode(tmp_path: Path):
|
||||
dataset_dir = tmp_path / "dataset"
|
||||
config = api.Track1RunConfig(
|
||||
mode="extinction_dataset",
|
||||
K=500,
|
||||
N0=20,
|
||||
n=1,
|
||||
u=0.001,
|
||||
R=10.0,
|
||||
T=10,
|
||||
epochs=1,
|
||||
runs=1,
|
||||
seed=1,
|
||||
dataset_dir=str(dataset_dir),
|
||||
grid={"u": [0.001, 0.005]},
|
||||
)
|
||||
payload = api.run_config(config)
|
||||
assert payload["mode"] == "extinction_dataset"
|
||||
assert payload["parameters"]["u"] == 0.001
|
||||
assert payload["parameters"]["M"] == 1.0
|
||||
assert payload["treatment_count"] == 2
|
||||
assert (dataset_dir / "run_rows.jsonl").exists()
|
||||
assert (dataset_dir / "generation_rows.jsonl").exists()
|
||||
Loading…
Reference in New Issue