Migrate Track 1 dataset layer into renunney
This commit is contained in:
parent
7ea94aa7fd
commit
aefd4e4ccb
|
|
@ -27,6 +27,7 @@ plane and the Track 1 runner/API boundary are now local to `renunney`.
|
||||||
- a local Track 1 simulation kernel,
|
- a local Track 1 simulation kernel,
|
||||||
- a local Track 1 report generator,
|
- a local Track 1 report generator,
|
||||||
- a local Track 1 extinction-model data layer,
|
- a local Track 1 extinction-model data layer,
|
||||||
|
- a local Track 1 dataset generator,
|
||||||
- a Makefile for common tasks,
|
- a Makefile for common tasks,
|
||||||
- migration notes for pulling code into this repo in stages.
|
- migration notes for pulling code into this repo in stages.
|
||||||
|
|
||||||
|
|
@ -92,7 +93,8 @@ The current state is split:
|
||||||
- Track 1 simulation kernel: local to `renunney`
|
- Track 1 simulation kernel: local to `renunney`
|
||||||
- Track 1 report generator: local to `renunney`
|
- Track 1 report generator: local to `renunney`
|
||||||
- Track 1 extinction-model data layer: local to `renunney`
|
- Track 1 extinction-model data layer: local to `renunney`
|
||||||
- Track 1 dataset and fit helpers: still imported
|
- Track 1 dataset generator: local to `renunney`
|
||||||
|
- Track 1 fit helper: still imported
|
||||||
from the older `cost_of_substitution` directory through the local
|
from the older `cost_of_substitution` directory through the local
|
||||||
compatibility layer
|
compatibility layer
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,11 +37,12 @@ Operational code still lives in:
|
||||||
- `src/renunney/track1_report.py`
|
- `src/renunney/track1_report.py`
|
||||||
8. Track 1 extinction-model data layer has been migrated locally:
|
8. Track 1 extinction-model data layer has been migrated locally:
|
||||||
- `src/renunney/track1_extinction.py`
|
- `src/renunney/track1_extinction.py`
|
||||||
9. Migrate dataset and fit modules next:
|
9. Track 1 dataset generator has been migrated locally:
|
||||||
- `python/track1_dataset.py`
|
- `src/renunney/track1_dataset.py`
|
||||||
|
10. Migrate the fit module next:
|
||||||
- `python/track1_fit.py`
|
- `python/track1_fit.py`
|
||||||
10. Reduce or remove the remaining compatibility-layer imports after those modules are local.
|
11. Reduce or remove the remaining compatibility-layer imports after those modules are local.
|
||||||
11. Migrate docs and example configs last, after path references are updated.
|
12. Migrate docs and example configs last, after path references are updated.
|
||||||
|
|
||||||
## Constraint
|
## Constraint
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,8 @@ make status
|
||||||
The Makefile now drives the local orchestration code in `renunney`, while the
|
The Makefile now drives the local orchestration code in `renunney`, while the
|
||||||
Track 1 runner/API boundary, analysis layer, threshold/search layer, and
|
Track 1 runner/API boundary, analysis layer, threshold/search layer, and
|
||||||
simulation kernel, report generator, and extinction-model data layer are also
|
simulation kernel, report generator, and extinction-model data layer are also
|
||||||
local to `renunney`. The remaining Track 1 dataset/fit helpers are still
|
local to `renunney`, and the dataset generator is now local as well. The
|
||||||
imported from the legacy `cost_of_substitution` directory through the
|
remaining Track 1 fit helper is still imported from the legacy
|
||||||
compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
|
`cost_of_substitution` directory through the compatibility layer in
|
||||||
configs used for submission are now local to `renunney/config`.
|
`src/renunney/legacy.py`. The paper-scale Figure 1 configs used for submission
|
||||||
|
are now local to `renunney/config`.
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ from .track1_analysis import (
|
||||||
sweep_number_of_loci,
|
sweep_number_of_loci,
|
||||||
)
|
)
|
||||||
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
|
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
|
||||||
|
from .track1_dataset import GRID_KEYS, generate_extinction_dataset
|
||||||
from .track1_extinction import (
|
from .track1_extinction import (
|
||||||
ExtinctionGenerationRow,
|
ExtinctionGenerationRow,
|
||||||
ExtinctionRunRow,
|
ExtinctionRunRow,
|
||||||
|
|
@ -113,6 +114,7 @@ __all__ = [
|
||||||
"expected_female_productivity",
|
"expected_female_productivity",
|
||||||
"expected_mutations_for_population",
|
"expected_mutations_for_population",
|
||||||
"evaluate_threshold_candidate",
|
"evaluate_threshold_candidate",
|
||||||
|
"generate_extinction_dataset",
|
||||||
"ExtinctionGenerationRow",
|
"ExtinctionGenerationRow",
|
||||||
"ExtinctionRunRow",
|
"ExtinctionRunRow",
|
||||||
"female_fecundity",
|
"female_fecundity",
|
||||||
|
|
@ -121,6 +123,7 @@ __all__ = [
|
||||||
"generation_metrics",
|
"generation_metrics",
|
||||||
"genotype_fitness",
|
"genotype_fitness",
|
||||||
"generate_report_bundle",
|
"generate_report_bundle",
|
||||||
|
"GRID_KEYS",
|
||||||
"initialize_population",
|
"initialize_population",
|
||||||
"is_extinct",
|
"is_extinct",
|
||||||
"build_extinction_generation_rows",
|
"build_extinction_generation_rows",
|
||||||
|
|
|
||||||
|
|
@ -16,13 +16,13 @@ from typing import Any, Optional
|
||||||
|
|
||||||
from .legacy import ensure_legacy_python_path
|
from .legacy import ensure_legacy_python_path
|
||||||
from .track1_analysis import summarize_tracking, sweep_number_of_loci
|
from .track1_analysis import summarize_tracking, sweep_number_of_loci
|
||||||
|
from .track1_dataset import generate_extinction_dataset
|
||||||
from .track1_reference import Track1Parameters, simulate_run
|
from .track1_reference import Track1Parameters, simulate_run
|
||||||
from .track1_report import generate_report_bundle
|
from .track1_report import generate_report_bundle
|
||||||
from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
|
from .track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
|
||||||
|
|
||||||
ensure_legacy_python_path()
|
ensure_legacy_python_path()
|
||||||
|
|
||||||
from track1_dataset import generate_extinction_dataset
|
|
||||||
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
|
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
"""
|
||||||
|
track1_dataset.py
|
||||||
|
|
||||||
|
Dataset generation for extinction-risk analysis on top of Track 1 simulations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import asdict
|
||||||
|
import itertools
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .track1_extinction import build_extinction_generation_rows, build_extinction_run_row, save_jsonl
|
||||||
|
from .track1_reference import Track1Parameters, simulate_run
|
||||||
|
|
||||||
|
|
||||||
|
GRID_KEYS = ("K", "N0", "n", "u", "R", "T", "epochs", "p", "a_max")
|
||||||
|
|
||||||
|
|
||||||
|
def _grid_axes_from_config(params: Track1Parameters, grid: dict[str, list[Any]] | None) -> dict[str, list[Any]]:
|
||||||
|
base = {
|
||||||
|
"K": [params.K],
|
||||||
|
"N0": [params.N0],
|
||||||
|
"n": [params.n],
|
||||||
|
"u": [params.u],
|
||||||
|
"R": [params.R],
|
||||||
|
"T": [params.T],
|
||||||
|
"epochs": [params.epochs],
|
||||||
|
"p": [params.p],
|
||||||
|
"a_max": [params.a_max],
|
||||||
|
}
|
||||||
|
if not grid:
|
||||||
|
return base
|
||||||
|
for key, values in grid.items():
|
||||||
|
if key not in GRID_KEYS:
|
||||||
|
raise ValueError(f"Unsupported extinction dataset grid key: {key}")
|
||||||
|
if not isinstance(values, list) or len(values) == 0:
|
||||||
|
raise ValueError(f"Grid key {key} must map to a non-empty list.")
|
||||||
|
base[key] = values
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def generate_extinction_dataset(
|
||||||
|
params: Track1Parameters,
|
||||||
|
runs: int,
|
||||||
|
seed_start: int,
|
||||||
|
dataset_dir: str | Path,
|
||||||
|
grid: dict[str, list[Any]] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
outdir = Path(dataset_dir)
|
||||||
|
outdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
axes = _grid_axes_from_config(params, grid)
|
||||||
|
combinations = list(itertools.product(*(axes[key] for key in GRID_KEYS)))
|
||||||
|
generation_rows = []
|
||||||
|
run_rows = []
|
||||||
|
treatment_rows = []
|
||||||
|
|
||||||
|
for treatment_index, values in enumerate(combinations):
|
||||||
|
combo = dict(zip(GRID_KEYS, values))
|
||||||
|
run_params = Track1Parameters(
|
||||||
|
K=int(combo["K"]),
|
||||||
|
N0=int(combo["N0"]),
|
||||||
|
n=int(combo["n"]),
|
||||||
|
u=float(combo["u"]),
|
||||||
|
R=float(combo["R"]),
|
||||||
|
T=int(combo["T"]),
|
||||||
|
epochs=int(combo["epochs"]),
|
||||||
|
p=float(combo["p"]),
|
||||||
|
a_max=None if combo["a_max"] is None else int(combo["a_max"]),
|
||||||
|
)
|
||||||
|
treatment_rows.append(
|
||||||
|
{
|
||||||
|
"treatment_index": treatment_index,
|
||||||
|
**asdict(run_params),
|
||||||
|
"M": run_params.M,
|
||||||
|
"runs": runs,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
for run_offset in range(runs):
|
||||||
|
seed = seed_start + (treatment_index * runs) + run_offset
|
||||||
|
summaries = simulate_run(run_params, seed=seed)
|
||||||
|
generation_rows.extend(build_extinction_generation_rows(run_params, summaries, seed=seed))
|
||||||
|
run_rows.append(build_extinction_run_row(run_params, summaries, seed=seed))
|
||||||
|
|
||||||
|
save_jsonl(generation_rows, outdir / "generation_rows.jsonl")
|
||||||
|
save_jsonl(run_rows, outdir / "run_rows.jsonl")
|
||||||
|
(outdir / "treatments.json").write_text(
|
||||||
|
json.dumps(treatment_rows, indent=2, sort_keys=True) + "\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
metadata = {
|
||||||
|
"dataset_dir": str(outdir),
|
||||||
|
"generation_rows_path": str(outdir / "generation_rows.jsonl"),
|
||||||
|
"run_rows_path": str(outdir / "run_rows.jsonl"),
|
||||||
|
"treatments_path": str(outdir / "treatments.json"),
|
||||||
|
"treatment_count": len(treatment_rows),
|
||||||
|
"run_row_count": len(run_rows),
|
||||||
|
"generation_row_count": len(generation_rows),
|
||||||
|
"runs_per_treatment": runs,
|
||||||
|
"seed_start": seed_start,
|
||||||
|
"grid": axes,
|
||||||
|
}
|
||||||
|
(outdir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||||
|
return metadata
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
SRC_DIR = ROOT / "src"
|
||||||
|
if str(SRC_DIR) not in sys.path:
|
||||||
|
sys.path.insert(0, str(SRC_DIR))
|
||||||
|
|
||||||
|
import renunney.track1_api as api
|
||||||
|
import renunney.track1_dataset as ds
|
||||||
|
import renunney.track1_reference as ref
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_extinction_dataset_writes_expected_files(tmp_path: Path):
|
||||||
|
params = ref.Track1Parameters(K=500, N0=20, n=1, u=0.001, R=10.0, T=10, epochs=1)
|
||||||
|
dataset_dir = tmp_path / "dataset"
|
||||||
|
payload = ds.generate_extinction_dataset(
|
||||||
|
params=params,
|
||||||
|
runs=1,
|
||||||
|
seed_start=1,
|
||||||
|
dataset_dir=dataset_dir,
|
||||||
|
grid={"N0": [20, 500], "u": [0.001, 0.005]},
|
||||||
|
)
|
||||||
|
assert payload["treatment_count"] == 4
|
||||||
|
assert payload["run_row_count"] == 4
|
||||||
|
assert Path(payload["generation_rows_path"]).exists()
|
||||||
|
assert Path(payload["run_rows_path"]).exists()
|
||||||
|
assert Path(payload["treatments_path"]).exists()
|
||||||
|
metadata = json.loads((dataset_dir / "metadata.json").read_text(encoding="utf-8"))
|
||||||
|
assert metadata["treatment_count"] == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_config_extinction_dataset_mode(tmp_path: Path):
|
||||||
|
dataset_dir = tmp_path / "dataset"
|
||||||
|
config = api.Track1RunConfig(
|
||||||
|
mode="extinction_dataset",
|
||||||
|
K=500,
|
||||||
|
N0=20,
|
||||||
|
n=1,
|
||||||
|
u=0.001,
|
||||||
|
R=10.0,
|
||||||
|
T=10,
|
||||||
|
epochs=1,
|
||||||
|
runs=1,
|
||||||
|
seed=1,
|
||||||
|
dataset_dir=str(dataset_dir),
|
||||||
|
grid={"u": [0.001, 0.005]},
|
||||||
|
)
|
||||||
|
payload = api.run_config(config)
|
||||||
|
assert payload["mode"] == "extinction_dataset"
|
||||||
|
assert payload["parameters"]["u"] == 0.001
|
||||||
|
assert payload["parameters"]["M"] == 1.0
|
||||||
|
assert payload["treatment_count"] == 2
|
||||||
|
assert (dataset_dir / "run_rows.jsonl").exists()
|
||||||
|
assert (dataset_dir / "generation_rows.jsonl").exists()
|
||||||
Loading…
Reference in New Issue