Migrate Track 1 runner and API boundary into renunney

This commit is contained in:
Codex 2026-04-11 06:24:12 -04:00
parent a880d9b99d
commit a6d1326165
8 changed files with 482 additions and 11 deletions

View File

@ -2,6 +2,7 @@ PYTHON := python3
REPO_ROOT := $(abspath .) REPO_ROOT := $(abspath .)
LEGACY_ROOT := $(REPO_ROOT)/../collaborations/to_ptbc/evc/cost_of_substitution LEGACY_ROOT := $(REPO_ROOT)/../collaborations/to_ptbc/evc/cost_of_substitution
ORCH := $(REPO_ROOT)/scripts/run_orchestration.py ORCH := $(REPO_ROOT)/scripts/run_orchestration.py
TRACK1 := $(REPO_ROOT)/scripts/run_track1.py
DB := $(REPO_ROOT)/runs/state/cos-orch.sqlite DB := $(REPO_ROOT)/runs/state/cos-orch.sqlite
RESULT_ROOT := $(REPO_ROOT)/runs/results RESULT_ROOT := $(REPO_ROOT)/runs/results
@ -13,7 +14,7 @@ FIG1_M05 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_5.json
FIG1_M10 := $(REPO_ROOT)/config/track1_figure1_paper_M_1_0.json FIG1_M10 := $(REPO_ROOT)/config/track1_figure1_paper_M_1_0.json
FIG1_M100 := $(REPO_ROOT)/config/track1_figure1_paper_M_10_0.json FIG1_M100 := $(REPO_ROOT)/config/track1_figure1_paper_M_10_0.json
.PHONY: help init doctor list-jobs run-one run-loop run-loop-one collate-figure1 \ .PHONY: help init doctor list-jobs run-one run-loop run-loop-one collate-figure1 track1-sim-smoke \
submit-figure1-m005 submit-figure1-m025 submit-figure1-m05 submit-figure1-m10 submit-figure1-m100 \ submit-figure1-m005 submit-figure1-m025 submit-figure1-m05 submit-figure1-m10 submit-figure1-m100 \
submit-all-figure1 status results-tree submit-all-figure1 status results-tree
@ -22,6 +23,7 @@ help:
@echo " init Create run directories and initialize the SQLite registry" @echo " init Create run directories and initialize the SQLite registry"
@echo " doctor Show key paths and verify local orchestration and legacy backend paths" @echo " doctor Show key paths and verify local orchestration and legacy backend paths"
@echo " list-jobs List jobs in the local registry" @echo " list-jobs List jobs in the local registry"
@echo " track1-sim-smoke Run one local Track 1 simulation through renunney runner"
@echo " run-one Claim and run one queued job" @echo " run-one Claim and run one queued job"
@echo " run-loop Run worker loop until queue empty" @echo " run-loop Run worker loop until queue empty"
@echo " run-loop-one Run exactly one queued job through the worker loop" @echo " run-loop-one Run exactly one queued job through the worker loop"
@ -43,15 +45,20 @@ doctor:
@echo "REPO_ROOT=$(REPO_ROOT)" @echo "REPO_ROOT=$(REPO_ROOT)"
@echo "LEGACY_ROOT=$(LEGACY_ROOT)" @echo "LEGACY_ROOT=$(LEGACY_ROOT)"
@echo "ORCH=$(ORCH)" @echo "ORCH=$(ORCH)"
@echo "TRACK1=$(TRACK1)"
@echo "DB=$(DB)" @echo "DB=$(DB)"
@echo "RESULT_ROOT=$(RESULT_ROOT)" @echo "RESULT_ROOT=$(RESULT_ROOT)"
@echo "SCRATCH_ROOT=$(SCRATCH_ROOT)" @echo "SCRATCH_ROOT=$(SCRATCH_ROOT)"
test -f $(ORCH) test -f $(ORCH)
test -f $(TRACK1)
test -d $(LEGACY_ROOT)/python test -d $(LEGACY_ROOT)/python
list-jobs: list-jobs:
$(PYTHON) $(ORCH) list --db $(DB) $(PYTHON) $(ORCH) list --db $(DB)
track1-sim-smoke:
$(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1
run-one: run-one:
$(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) $(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT)

View File

@ -13,7 +13,7 @@ This repository is the clean operational wrapper around the current work in:
- [`../collaborations/to_ptbc/evc/cost_of_substitution`](/mnt/CIFS/pengolodh/Docs/Projects/collaborations/to_ptbc/evc/cost_of_substitution) - [`../collaborations/to_ptbc/evc/cost_of_substitution`](/mnt/CIFS/pengolodh/Docs/Projects/collaborations/to_ptbc/evc/cost_of_substitution)
The Track 1 simulation backend still lives there. The orchestration control The Track 1 simulation backend still lives there. The orchestration control
plane is now local to `renunney`. plane and the Track 1 runner/API boundary are now local to `renunney`.
`renunney` provides: `renunney` provides:
@ -21,6 +21,7 @@ plane is now local to `renunney`.
- a stable working directory layout, - a stable working directory layout,
- a local orchestration CLI and library, - a local orchestration CLI and library,
- local paper-scale Figure 1 submission configs, - local paper-scale Figure 1 submission configs,
- a local Track 1 runner and config/API layer,
- a Makefile for common tasks, - a Makefile for common tasks,
- migration notes for pulling code into this repo in stages. - migration notes for pulling code into this repo in stages.
@ -38,6 +39,10 @@ plane is now local to `renunney`.
- local worker scratch and cache files - local worker scratch and cache files
- `src/renunney/` - `src/renunney/`
- future in-repo Python package and migration target - future in-repo Python package and migration target
- `scripts/`
- local CLI entrypoints
- `tests/`
- local verification for migrated boundaries
## Start ## Start
@ -53,6 +58,12 @@ Submit a paper-scale Figure 1 treatment:
make submit-figure1-m10 make submit-figure1-m10
``` ```
Run one local Track 1 simulation through the migrated runner/API boundary:
```bash
make track1-sim-smoke
```
Run one worker loop locally: Run one worker loop locally:
```bash ```bash
@ -70,8 +81,9 @@ make collate-figure1
The current state is split: The current state is split:
- orchestration control plane: local to `renunney` - orchestration control plane: local to `renunney`
- Track 1 runner and config/API layer: local to `renunney`
- Track 1 simulation backend: still in the older `cost_of_substitution` - Track 1 simulation backend: still in the older `cost_of_substitution`
directory directory and imported through the local compatibility layer
This repo is now the clean operational entry point while the simulation code is This repo is now the clean operational entry point while the simulation code is
migrated in later stages. migrated in later stages.

View File

@ -24,15 +24,20 @@ Operational code still lives in:
- `src/renunney/orchestration.py` - `src/renunney/orchestration.py`
- `scripts/run_orchestration.py` - `scripts/run_orchestration.py`
2. Paper-scale Figure 1 submission configs have been copied locally into `config/`. 2. Paper-scale Figure 1 submission configs have been copied locally into `config/`.
3. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. 3. Track 1 runner and API boundary have been migrated locally:
4. Migrate Track 1 runner and API next: - `scripts/run_track1.py`
- `python/run_track1.py` - `src/renunney/track1_api.py`
- `python/track1_api.py` 4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable.
5. Migrate the Track 1 simulation core after the runner path is stable: 5. Migrate the Track 1 simulation core after the runner path is stable:
- `python/track1_reference.py` - `python/track1_reference.py`
- `python/track1_threshold.py` - `python/track1_threshold.py`
- `python/track1_analysis.py` - `python/track1_analysis.py`
6. Migrate docs and example configs last, after path references are updated. 6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable:
- `python/track1_report.py`
- `python/track1_dataset.py`
- `python/track1_fit.py`
- `python/track1_extinction.py`
7. Migrate docs and example configs last, after path references are updated.
## Constraint ## Constraint

View File

@ -14,6 +14,12 @@ Check paths:
make doctor make doctor
``` ```
Run one local Track 1 simulation through the in-repo runner:
```bash
make track1-sim-smoke
```
Submit a Figure 1 treatment: Submit a Figure 1 treatment:
```bash ```bash
@ -41,6 +47,7 @@ make status
## Current Assumption ## Current Assumption
The Makefile now drives the local orchestration code in `renunney`, while the The Makefile now drives the local orchestration code in `renunney`, while the
simulation backend is still imported from the legacy `cost_of_substitution` Track 1 runner/API boundary is also local to `renunney`. The simulation kernel
directory. The paper-scale Figure 1 configs used for submission are now local is still imported from the legacy `cost_of_substitution` directory through the
to `renunney/config`. compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1
configs used for submission are now local to `renunney/config`.

105
scripts/run_track1.py Normal file
View File

@ -0,0 +1,105 @@
"""
run_track1.py
Local Track 1 runner entrypoint for renunney.
"""
from __future__ import annotations
import argparse
from pathlib import Path
import sys
REPO_ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = REPO_ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from renunney.track1_api import Track1RunConfig, load_config, run_config, save_payload
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Run Track 1 cost-of-substitution experiments.")
parser.add_argument("--mode", choices=["simulate", "report", "threshold", "search", "loci_regression", "extinction_dataset", "extinction_fit"], default="simulate")
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--K", type=int, default=5000)
parser.add_argument("--N0", type=int, default=500)
parser.add_argument("--n", type=int, default=1)
parser.add_argument("--u", type=float, default=5.0e-6)
parser.add_argument("--M", type=float, default=None)
parser.add_argument("--R", type=float, default=10.0)
parser.add_argument("--T", type=int, default=300)
parser.add_argument("--epochs", type=int, default=8)
parser.add_argument("--p", type=float, default=0.5)
parser.add_argument("--a-max", dest="a_max", type=int, default=None)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--runs", type=int, default=20)
parser.add_argument("--t-start", type=int, default=50)
parser.add_argument("--t-stop", type=int, default=500)
parser.add_argument("--t-step", type=int, default=10)
parser.add_argument("--t-values", type=str, default=None)
parser.add_argument("--loci-values", type=str, default=None)
parser.add_argument("--cache-path", type=str, default=None)
parser.add_argument("--jobs", type=int, default=1)
parser.add_argument("--report-dir", type=str, default=None)
parser.add_argument("--dataset-dir", type=str, default=None)
parser.add_argument("--run-rows-path", type=str, default=None)
parser.add_argument("--output", type=str, default=None)
return parser
def config_from_args(args: argparse.Namespace) -> Track1RunConfig:
loci_values = None
if args.loci_values:
loci_values = [int(part.strip()) for part in args.loci_values.split(",") if part.strip()]
t_values = None
if args.t_values:
t_values = [float(part.strip()) for part in args.t_values.split(",") if part.strip()]
return Track1RunConfig(
mode=args.mode,
K=args.K,
N0=args.N0,
n=args.n,
u=args.u if args.M is None else None,
M=args.M,
R=args.R,
T=args.T,
epochs=args.epochs,
p=args.p,
a_max=args.a_max,
seed=args.seed,
runs=args.runs,
t_start=args.t_start,
t_stop=args.t_stop,
t_step=args.t_step,
t_values=t_values,
loci_values=loci_values,
cache_path=args.cache_path,
jobs=args.jobs,
report_dir=args.report_dir,
dataset_dir=args.dataset_dir,
run_rows_path=args.run_rows_path,
)
def main() -> int:
parser = build_parser()
args = parser.parse_args()
if args.config:
config = load_config(args.config)
else:
config = config_from_args(args)
payload = run_config(config)
import json
rendered = json.dumps(payload, indent=2, sort_keys=True)
if args.output:
save_payload(payload, Path(args.output))
print(rendered)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -18,6 +18,7 @@ from .orchestration import (
submit_job_manifest, submit_job_manifest,
submit_track1_figure1_jobs, submit_track1_figure1_jobs,
) )
from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload
__all__ = [ __all__ = [
"ClaimedJob", "ClaimedJob",
@ -39,4 +40,9 @@ __all__ = [
"run_worker_loop", "run_worker_loop",
"submit_job_manifest", "submit_job_manifest",
"submit_track1_figure1_jobs", "submit_track1_figure1_jobs",
"Track1RunConfig",
"config_from_mapping",
"load_config",
"run_config",
"save_payload",
] ]

289
src/renunney/track1_api.py Normal file
View File

@ -0,0 +1,289 @@
"""
track1_api.py
Local Track 1 API boundary for renunney.
This stage keeps the simulation backend in the legacy tree while moving the
public config/runner contract into the clean repo.
"""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Optional
from .legacy import ensure_legacy_python_path
ensure_legacy_python_path()
from track1_analysis import summarize_tracking, sweep_number_of_loci
from track1_dataset import generate_extinction_dataset
from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl
from track1_reference import Track1Parameters, simulate_run
from track1_report import generate_report_bundle
from track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates
@dataclass(frozen=True, init=False)
class Track1RunConfig:
mode: str = "simulate"
K: int = 5000
N0: int = 500
n: int = 1
u: float = 5.0e-6
R: float = 10.0
T: int = 300
epochs: int = 8
p: float = 0.5
a_max: Optional[int] = None
seed: int = 0
runs: int = 20
t_start: int = 50
t_stop: int = 500
t_step: int = 10
t_values: Optional[list[float]] = None
loci_values: Optional[list[int]] = None
cache_path: Optional[str] = None
jobs: int = 1
report_dir: Optional[str] = None
dataset_dir: Optional[str] = None
run_rows_path: Optional[str] = None
grid: Optional[dict[str, list[Any]]] = None
def __init__(
self,
mode: str = "simulate",
K: int = 5000,
N0: int = 500,
n: int = 1,
u: float | None = 5.0e-6,
R: float = 10.0,
T: int = 300,
epochs: int = 8,
p: float = 0.5,
a_max: Optional[int] = None,
seed: int = 0,
runs: int = 20,
t_start: int = 50,
t_stop: int = 500,
t_step: int = 10,
t_values: Optional[list[float]] = None,
loci_values: Optional[list[int]] = None,
cache_path: Optional[str] = None,
jobs: int = 1,
report_dir: Optional[str] = None,
dataset_dir: Optional[str] = None,
run_rows_path: Optional[str] = None,
grid: Optional[dict[str, list[Any]]] = None,
M: float | None = None,
) -> None:
if u is None:
if M is None:
raise ValueError("Track1RunConfig requires u, or convenience M to derive u.")
u = float(M / (2.0 * K))
object.__setattr__(self, "mode", mode)
object.__setattr__(self, "K", int(K))
object.__setattr__(self, "N0", int(N0))
object.__setattr__(self, "n", int(n))
object.__setattr__(self, "u", float(u))
object.__setattr__(self, "R", float(R))
object.__setattr__(self, "T", int(T))
object.__setattr__(self, "epochs", int(epochs))
object.__setattr__(self, "p", float(p))
object.__setattr__(self, "a_max", a_max)
object.__setattr__(self, "seed", int(seed))
object.__setattr__(self, "runs", int(runs))
object.__setattr__(self, "t_start", int(t_start))
object.__setattr__(self, "t_stop", int(t_stop))
object.__setattr__(self, "t_step", int(t_step))
object.__setattr__(self, "t_values", t_values)
object.__setattr__(self, "loci_values", loci_values)
object.__setattr__(self, "cache_path", cache_path)
object.__setattr__(self, "jobs", int(jobs))
object.__setattr__(self, "report_dir", report_dir)
object.__setattr__(self, "dataset_dir", dataset_dir)
object.__setattr__(self, "run_rows_path", run_rows_path)
object.__setattr__(self, "grid", grid)
@property
def M(self) -> float:
return float(2.0 * self.K * self.u)
def to_parameters(self) -> Track1Parameters:
return Track1Parameters(
K=self.K,
N0=self.N0,
n=self.n,
u=self.u,
R=self.R,
T=self.T,
epochs=self.epochs,
p=self.p,
a_max=self.a_max,
)
def parameter_payload(params: Track1Parameters) -> dict[str, Any]:
payload = asdict(params)
payload["M"] = params.M
return payload
def config_from_mapping(mapping: dict[str, Any]) -> Track1RunConfig:
allowed = set(Track1RunConfig.__dataclass_fields__.keys())
filtered = {key: value for key, value in mapping.items() if key in allowed}
if "M" in mapping and "u" not in filtered:
filtered["M"] = mapping["M"]
return Track1RunConfig(**filtered)
def load_config(path: str | Path) -> Track1RunConfig:
raw = json.loads(Path(path).read_text(encoding="utf-8"))
return config_from_mapping(raw)
def save_payload(payload: dict[str, Any], path: str | Path) -> None:
out = Path(path)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
def run_config(config: Track1RunConfig) -> dict[str, Any]:
params = config.to_parameters()
candidate_values = (
[float(value) for value in config.t_values]
if config.t_values is not None
else list(range(config.t_start, config.t_stop + 1, config.t_step))
)
if config.mode == "simulate":
summaries = simulate_run(params, seed=config.seed)
return {
"mode": "simulate",
"parameters": parameter_payload(params),
"generations_recorded": len(summaries),
"extinct": bool(summaries[-1].extinct) if summaries else True,
"tracking_summary": asdict(summarize_tracking(summaries)),
"final_summary": asdict(summaries[-1]) if summaries else None,
}
if config.mode == "report":
report_dir = config.report_dir or "/tmp/track1-report"
return generate_report_bundle(
params=params,
runs=config.runs,
seed_start=config.seed,
report_dir=report_dir,
)
if config.mode == "extinction_dataset":
dataset_dir = config.dataset_dir or "/tmp/track1-extinction-dataset"
metadata = generate_extinction_dataset(
params=params,
runs=config.runs,
seed_start=config.seed,
dataset_dir=dataset_dir,
grid=config.grid,
)
return {
"mode": "extinction_dataset",
"parameters": parameter_payload(params),
**metadata,
}
if config.mode == "extinction_fit":
run_rows_path = config.run_rows_path
if run_rows_path is None:
dataset_dir = config.dataset_dir or "/tmp/track1-extinction-dataset"
run_rows_path = str(Path(dataset_dir) / "run_rows.jsonl")
try:
payload = fit_payload_from_jsonl(run_rows_path)
fit_status = "ok"
fit_error = None
except ValueError as exc:
rows = load_jsonl(run_rows_path)
payload = {
"run_rows_path": str(Path(run_rows_path)),
"model": None,
"summary": asdict(class_balance(rows)),
}
fit_status = "insufficient_outcome_variation"
fit_error = str(exc)
return {
"mode": "extinction_fit",
"parameters": parameter_payload(params),
"fit_status": fit_status,
"fit_error": fit_error,
**payload,
}
if config.mode == "threshold":
result = evaluate_threshold_candidate(
params=params,
T_value=float(config.T),
runs=config.runs,
seed_start=config.seed,
cache_path=config.cache_path,
jobs=config.jobs,
)
return {
"mode": "threshold",
"parameters": parameter_payload(params),
"result": {
"threshold_T": result.threshold_T,
"baseline_check": asdict(result.baseline_check),
"check_1_02": asdict(result.check_1_02),
"check_1_05": asdict(result.check_1_05),
"check_1_10": asdict(result.check_1_10),
"retest_check": asdict(result.retest_check) if result.retest_check else None,
},
}
if config.mode == "search":
result = search_threshold_over_candidates(
params=params,
candidate_T_values=candidate_values,
runs=config.runs,
seed_start=config.seed,
cache_path=config.cache_path,
jobs=config.jobs,
)
return {
"mode": "search",
"parameters": parameter_payload(params),
"candidates": candidate_values,
"result": None
if result is None
else {
"threshold_T": result.threshold_T,
"baseline_check": asdict(result.baseline_check),
"check_1_02": asdict(result.check_1_02),
"check_1_05": asdict(result.check_1_05),
"check_1_10": asdict(result.check_1_10),
"retest_check": asdict(result.retest_check) if result.retest_check else None,
},
}
if config.mode == "loci_regression":
loci_values = config.loci_values if config.loci_values is not None else [1, 2, 3, 4, 5, 6, 7]
sweep = sweep_number_of_loci(
params=params,
loci_values=loci_values,
candidate_T_values=candidate_values,
runs=config.runs,
seed_start=config.seed,
cache_path=config.cache_path,
jobs=config.jobs,
)
return {
"mode": "loci_regression",
"parameters": parameter_payload(params),
"loci_values": loci_values,
"candidates": candidate_values,
"rows": [asdict(row) for row in sweep.rows],
"fit": None if sweep.fit is None else asdict(sweep.fit),
}
raise ValueError(f"Unsupported Track 1 mode: {config.mode}")

40
tests/test_track1_api.py Normal file
View File

@ -0,0 +1,40 @@
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = ROOT / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from renunney import track1_api as api
def test_run_config_simulate_mode_returns_contract():
config = api.Track1RunConfig(mode="simulate", K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20, seed=1)
payload = api.run_config(config)
assert payload["mode"] == "simulate"
assert payload["parameters"]["u"] == 5e-6
assert payload["parameters"]["M"] == 0.05
assert "final_summary" in payload
assert "tracking_summary" in payload
def test_config_roundtrip_from_mapping_and_file(tmp_path: Path):
raw = {
"mode": "search",
"K": 5000,
"N0": 20,
"n": 1,
"u": 5e-6,
"R": 10.0,
"T": 20,
"runs": 1,
"jobs": 1,
"t_values": [5, 10],
}
cfg = api.config_from_mapping(raw)
path = tmp_path / "track1.json"
path.write_text(json.dumps(raw), encoding="utf-8")
loaded = api.load_config(path)
assert loaded == cfg