From a6d1326165e26d5e53a3e84a22860fda97058edf Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 11 Apr 2026 06:24:12 -0400 Subject: [PATCH] Migrate Track 1 runner and API boundary into renunney --- Makefile | 9 +- README.md | 16 +- docs/MIGRATION.md | 15 +- docs/WORKFLOW.md | 13 +- scripts/run_track1.py | 105 ++++++++++++++ src/renunney/__init__.py | 6 + src/renunney/track1_api.py | 289 +++++++++++++++++++++++++++++++++++++ tests/test_track1_api.py | 40 +++++ 8 files changed, 482 insertions(+), 11 deletions(-) create mode 100644 scripts/run_track1.py create mode 100644 src/renunney/track1_api.py create mode 100644 tests/test_track1_api.py diff --git a/Makefile b/Makefile index f1f6c1a..9a03c5d 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ PYTHON := python3 REPO_ROOT := $(abspath .) LEGACY_ROOT := $(REPO_ROOT)/../collaborations/to_ptbc/evc/cost_of_substitution ORCH := $(REPO_ROOT)/scripts/run_orchestration.py +TRACK1 := $(REPO_ROOT)/scripts/run_track1.py DB := $(REPO_ROOT)/runs/state/cos-orch.sqlite RESULT_ROOT := $(REPO_ROOT)/runs/results @@ -13,7 +14,7 @@ FIG1_M05 := $(REPO_ROOT)/config/track1_figure1_paper_M_0_5.json FIG1_M10 := $(REPO_ROOT)/config/track1_figure1_paper_M_1_0.json FIG1_M100 := $(REPO_ROOT)/config/track1_figure1_paper_M_10_0.json -.PHONY: help init doctor list-jobs run-one run-loop run-loop-one collate-figure1 \ +.PHONY: help init doctor list-jobs run-one run-loop run-loop-one collate-figure1 track1-sim-smoke \ submit-figure1-m005 submit-figure1-m025 submit-figure1-m05 submit-figure1-m10 submit-figure1-m100 \ submit-all-figure1 status results-tree @@ -22,6 +23,7 @@ help: @echo " init Create run directories and initialize the SQLite registry" @echo " doctor Show key paths and verify local orchestration and legacy backend paths" @echo " list-jobs List jobs in the local registry" + @echo " track1-sim-smoke Run one local Track 1 simulation through renunney runner" @echo " run-one Claim and run one queued job" @echo " run-loop Run worker loop until queue empty" @echo " run-loop-one Run exactly one queued job through the worker loop" @@ -43,15 +45,20 @@ doctor: @echo "REPO_ROOT=$(REPO_ROOT)" @echo "LEGACY_ROOT=$(LEGACY_ROOT)" @echo "ORCH=$(ORCH)" + @echo "TRACK1=$(TRACK1)" @echo "DB=$(DB)" @echo "RESULT_ROOT=$(RESULT_ROOT)" @echo "SCRATCH_ROOT=$(SCRATCH_ROOT)" test -f $(ORCH) + test -f $(TRACK1) test -d $(LEGACY_ROOT)/python list-jobs: $(PYTHON) $(ORCH) list --db $(DB) +track1-sim-smoke: + $(PYTHON) $(TRACK1) --mode simulate --K 5000 --N0 50 --n 1 --u 5e-6 --R 10 --T 40 --epochs 8 --seed 1 + run-one: $(PYTHON) $(ORCH) run-one --db $(DB) --result-root $(RESULT_ROOT) --scratch-root $(SCRATCH_ROOT) diff --git a/README.md b/README.md index 3770553..15fbadb 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository is the clean operational wrapper around the current work in: - [`../collaborations/to_ptbc/evc/cost_of_substitution`](/mnt/CIFS/pengolodh/Docs/Projects/collaborations/to_ptbc/evc/cost_of_substitution) The Track 1 simulation backend still lives there. The orchestration control -plane is now local to `renunney`. +plane and the Track 1 runner/API boundary are now local to `renunney`. `renunney` provides: @@ -21,6 +21,7 @@ plane is now local to `renunney`. - a stable working directory layout, - a local orchestration CLI and library, - local paper-scale Figure 1 submission configs, +- a local Track 1 runner and config/API layer, - a Makefile for common tasks, - migration notes for pulling code into this repo in stages. @@ -38,6 +39,10 @@ plane is now local to `renunney`. - local worker scratch and cache files - `src/renunney/` - future in-repo Python package and migration target +- `scripts/` + - local CLI entrypoints +- `tests/` + - local verification for migrated boundaries ## Start @@ -53,6 +58,12 @@ Submit a paper-scale Figure 1 treatment: make submit-figure1-m10 ``` +Run one local Track 1 simulation through the migrated runner/API boundary: + +```bash +make track1-sim-smoke +``` + Run one worker loop locally: ```bash @@ -70,8 +81,9 @@ make collate-figure1 The current state is split: - orchestration control plane: local to `renunney` +- Track 1 runner and config/API layer: local to `renunney` - Track 1 simulation backend: still in the older `cost_of_substitution` - directory + directory and imported through the local compatibility layer This repo is now the clean operational entry point while the simulation code is migrated in later stages. diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index 4992447..070f008 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -24,15 +24,20 @@ Operational code still lives in: - `src/renunney/orchestration.py` - `scripts/run_orchestration.py` 2. Paper-scale Figure 1 submission configs have been copied locally into `config/`. -3. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. -4. Migrate Track 1 runner and API next: - - `python/run_track1.py` - - `python/track1_api.py` +3. Track 1 runner and API boundary have been migrated locally: + - `scripts/run_track1.py` + - `src/renunney/track1_api.py` +4. Keep the Track 1 simulation backend in the legacy path until real multi-host runs are stable. 5. Migrate the Track 1 simulation core after the runner path is stable: - `python/track1_reference.py` - `python/track1_threshold.py` - `python/track1_analysis.py` -6. Migrate docs and example configs last, after path references are updated. +6. Migrate report, dataset, fit, and orchestration-adjacent Track 1 modules after the kernel boundary is stable: + - `python/track1_report.py` + - `python/track1_dataset.py` + - `python/track1_fit.py` + - `python/track1_extinction.py` +7. Migrate docs and example configs last, after path references are updated. ## Constraint diff --git a/docs/WORKFLOW.md b/docs/WORKFLOW.md index b109386..3721502 100644 --- a/docs/WORKFLOW.md +++ b/docs/WORKFLOW.md @@ -14,6 +14,12 @@ Check paths: make doctor ``` +Run one local Track 1 simulation through the in-repo runner: + +```bash +make track1-sim-smoke +``` + Submit a Figure 1 treatment: ```bash @@ -41,6 +47,7 @@ make status ## Current Assumption The Makefile now drives the local orchestration code in `renunney`, while the -simulation backend is still imported from the legacy `cost_of_substitution` -directory. The paper-scale Figure 1 configs used for submission are now local -to `renunney/config`. +Track 1 runner/API boundary is also local to `renunney`. The simulation kernel +is still imported from the legacy `cost_of_substitution` directory through the +compatibility layer in `src/renunney/legacy.py`. The paper-scale Figure 1 +configs used for submission are now local to `renunney/config`. diff --git a/scripts/run_track1.py b/scripts/run_track1.py new file mode 100644 index 0000000..6fbb4d0 --- /dev/null +++ b/scripts/run_track1.py @@ -0,0 +1,105 @@ +""" +run_track1.py + +Local Track 1 runner entrypoint for renunney. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +import sys + +REPO_ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = REPO_ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +from renunney.track1_api import Track1RunConfig, load_config, run_config, save_payload + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Run Track 1 cost-of-substitution experiments.") + parser.add_argument("--mode", choices=["simulate", "report", "threshold", "search", "loci_regression", "extinction_dataset", "extinction_fit"], default="simulate") + parser.add_argument("--config", type=str, default=None) + parser.add_argument("--K", type=int, default=5000) + parser.add_argument("--N0", type=int, default=500) + parser.add_argument("--n", type=int, default=1) + parser.add_argument("--u", type=float, default=5.0e-6) + parser.add_argument("--M", type=float, default=None) + parser.add_argument("--R", type=float, default=10.0) + parser.add_argument("--T", type=int, default=300) + parser.add_argument("--epochs", type=int, default=8) + parser.add_argument("--p", type=float, default=0.5) + parser.add_argument("--a-max", dest="a_max", type=int, default=None) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--runs", type=int, default=20) + parser.add_argument("--t-start", type=int, default=50) + parser.add_argument("--t-stop", type=int, default=500) + parser.add_argument("--t-step", type=int, default=10) + parser.add_argument("--t-values", type=str, default=None) + parser.add_argument("--loci-values", type=str, default=None) + parser.add_argument("--cache-path", type=str, default=None) + parser.add_argument("--jobs", type=int, default=1) + parser.add_argument("--report-dir", type=str, default=None) + parser.add_argument("--dataset-dir", type=str, default=None) + parser.add_argument("--run-rows-path", type=str, default=None) + parser.add_argument("--output", type=str, default=None) + return parser + + +def config_from_args(args: argparse.Namespace) -> Track1RunConfig: + loci_values = None + if args.loci_values: + loci_values = [int(part.strip()) for part in args.loci_values.split(",") if part.strip()] + t_values = None + if args.t_values: + t_values = [float(part.strip()) for part in args.t_values.split(",") if part.strip()] + return Track1RunConfig( + mode=args.mode, + K=args.K, + N0=args.N0, + n=args.n, + u=args.u if args.M is None else None, + M=args.M, + R=args.R, + T=args.T, + epochs=args.epochs, + p=args.p, + a_max=args.a_max, + seed=args.seed, + runs=args.runs, + t_start=args.t_start, + t_stop=args.t_stop, + t_step=args.t_step, + t_values=t_values, + loci_values=loci_values, + cache_path=args.cache_path, + jobs=args.jobs, + report_dir=args.report_dir, + dataset_dir=args.dataset_dir, + run_rows_path=args.run_rows_path, + ) + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + if args.config: + config = load_config(args.config) + else: + config = config_from_args(args) + + payload = run_config(config) + import json + + rendered = json.dumps(payload, indent=2, sort_keys=True) + if args.output: + save_payload(payload, Path(args.output)) + print(rendered) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/renunney/__init__.py b/src/renunney/__init__.py index e835411..9f37925 100644 --- a/src/renunney/__init__.py +++ b/src/renunney/__init__.py @@ -18,6 +18,7 @@ from .orchestration import ( submit_job_manifest, submit_track1_figure1_jobs, ) +from .track1_api import Track1RunConfig, config_from_mapping, load_config, run_config, save_payload __all__ = [ "ClaimedJob", @@ -39,4 +40,9 @@ __all__ = [ "run_worker_loop", "submit_job_manifest", "submit_track1_figure1_jobs", + "Track1RunConfig", + "config_from_mapping", + "load_config", + "run_config", + "save_payload", ] diff --git a/src/renunney/track1_api.py b/src/renunney/track1_api.py new file mode 100644 index 0000000..257a240 --- /dev/null +++ b/src/renunney/track1_api.py @@ -0,0 +1,289 @@ +""" +track1_api.py + +Local Track 1 API boundary for renunney. + +This stage keeps the simulation backend in the legacy tree while moving the +public config/runner contract into the clean repo. +""" + +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any, Optional + +from .legacy import ensure_legacy_python_path + +ensure_legacy_python_path() + +from track1_analysis import summarize_tracking, sweep_number_of_loci +from track1_dataset import generate_extinction_dataset +from track1_fit import class_balance, fit_payload_from_jsonl, load_jsonl +from track1_reference import Track1Parameters, simulate_run +from track1_report import generate_report_bundle +from track1_threshold import evaluate_threshold_candidate, search_threshold_over_candidates + + +@dataclass(frozen=True, init=False) +class Track1RunConfig: + mode: str = "simulate" + K: int = 5000 + N0: int = 500 + n: int = 1 + u: float = 5.0e-6 + R: float = 10.0 + T: int = 300 + epochs: int = 8 + p: float = 0.5 + a_max: Optional[int] = None + seed: int = 0 + runs: int = 20 + t_start: int = 50 + t_stop: int = 500 + t_step: int = 10 + t_values: Optional[list[float]] = None + loci_values: Optional[list[int]] = None + cache_path: Optional[str] = None + jobs: int = 1 + report_dir: Optional[str] = None + dataset_dir: Optional[str] = None + run_rows_path: Optional[str] = None + grid: Optional[dict[str, list[Any]]] = None + + def __init__( + self, + mode: str = "simulate", + K: int = 5000, + N0: int = 500, + n: int = 1, + u: float | None = 5.0e-6, + R: float = 10.0, + T: int = 300, + epochs: int = 8, + p: float = 0.5, + a_max: Optional[int] = None, + seed: int = 0, + runs: int = 20, + t_start: int = 50, + t_stop: int = 500, + t_step: int = 10, + t_values: Optional[list[float]] = None, + loci_values: Optional[list[int]] = None, + cache_path: Optional[str] = None, + jobs: int = 1, + report_dir: Optional[str] = None, + dataset_dir: Optional[str] = None, + run_rows_path: Optional[str] = None, + grid: Optional[dict[str, list[Any]]] = None, + M: float | None = None, + ) -> None: + if u is None: + if M is None: + raise ValueError("Track1RunConfig requires u, or convenience M to derive u.") + u = float(M / (2.0 * K)) + object.__setattr__(self, "mode", mode) + object.__setattr__(self, "K", int(K)) + object.__setattr__(self, "N0", int(N0)) + object.__setattr__(self, "n", int(n)) + object.__setattr__(self, "u", float(u)) + object.__setattr__(self, "R", float(R)) + object.__setattr__(self, "T", int(T)) + object.__setattr__(self, "epochs", int(epochs)) + object.__setattr__(self, "p", float(p)) + object.__setattr__(self, "a_max", a_max) + object.__setattr__(self, "seed", int(seed)) + object.__setattr__(self, "runs", int(runs)) + object.__setattr__(self, "t_start", int(t_start)) + object.__setattr__(self, "t_stop", int(t_stop)) + object.__setattr__(self, "t_step", int(t_step)) + object.__setattr__(self, "t_values", t_values) + object.__setattr__(self, "loci_values", loci_values) + object.__setattr__(self, "cache_path", cache_path) + object.__setattr__(self, "jobs", int(jobs)) + object.__setattr__(self, "report_dir", report_dir) + object.__setattr__(self, "dataset_dir", dataset_dir) + object.__setattr__(self, "run_rows_path", run_rows_path) + object.__setattr__(self, "grid", grid) + + @property + def M(self) -> float: + return float(2.0 * self.K * self.u) + + def to_parameters(self) -> Track1Parameters: + return Track1Parameters( + K=self.K, + N0=self.N0, + n=self.n, + u=self.u, + R=self.R, + T=self.T, + epochs=self.epochs, + p=self.p, + a_max=self.a_max, + ) + + +def parameter_payload(params: Track1Parameters) -> dict[str, Any]: + payload = asdict(params) + payload["M"] = params.M + return payload + + +def config_from_mapping(mapping: dict[str, Any]) -> Track1RunConfig: + allowed = set(Track1RunConfig.__dataclass_fields__.keys()) + filtered = {key: value for key, value in mapping.items() if key in allowed} + if "M" in mapping and "u" not in filtered: + filtered["M"] = mapping["M"] + return Track1RunConfig(**filtered) + + +def load_config(path: str | Path) -> Track1RunConfig: + raw = json.loads(Path(path).read_text(encoding="utf-8")) + return config_from_mapping(raw) + + +def save_payload(payload: dict[str, Any], path: str | Path) -> None: + out = Path(path) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def run_config(config: Track1RunConfig) -> dict[str, Any]: + params = config.to_parameters() + candidate_values = ( + [float(value) for value in config.t_values] + if config.t_values is not None + else list(range(config.t_start, config.t_stop + 1, config.t_step)) + ) + + if config.mode == "simulate": + summaries = simulate_run(params, seed=config.seed) + return { + "mode": "simulate", + "parameters": parameter_payload(params), + "generations_recorded": len(summaries), + "extinct": bool(summaries[-1].extinct) if summaries else True, + "tracking_summary": asdict(summarize_tracking(summaries)), + "final_summary": asdict(summaries[-1]) if summaries else None, + } + + if config.mode == "report": + report_dir = config.report_dir or "/tmp/track1-report" + return generate_report_bundle( + params=params, + runs=config.runs, + seed_start=config.seed, + report_dir=report_dir, + ) + + if config.mode == "extinction_dataset": + dataset_dir = config.dataset_dir or "/tmp/track1-extinction-dataset" + metadata = generate_extinction_dataset( + params=params, + runs=config.runs, + seed_start=config.seed, + dataset_dir=dataset_dir, + grid=config.grid, + ) + return { + "mode": "extinction_dataset", + "parameters": parameter_payload(params), + **metadata, + } + + if config.mode == "extinction_fit": + run_rows_path = config.run_rows_path + if run_rows_path is None: + dataset_dir = config.dataset_dir or "/tmp/track1-extinction-dataset" + run_rows_path = str(Path(dataset_dir) / "run_rows.jsonl") + try: + payload = fit_payload_from_jsonl(run_rows_path) + fit_status = "ok" + fit_error = None + except ValueError as exc: + rows = load_jsonl(run_rows_path) + payload = { + "run_rows_path": str(Path(run_rows_path)), + "model": None, + "summary": asdict(class_balance(rows)), + } + fit_status = "insufficient_outcome_variation" + fit_error = str(exc) + return { + "mode": "extinction_fit", + "parameters": parameter_payload(params), + "fit_status": fit_status, + "fit_error": fit_error, + **payload, + } + + if config.mode == "threshold": + result = evaluate_threshold_candidate( + params=params, + T_value=float(config.T), + runs=config.runs, + seed_start=config.seed, + cache_path=config.cache_path, + jobs=config.jobs, + ) + return { + "mode": "threshold", + "parameters": parameter_payload(params), + "result": { + "threshold_T": result.threshold_T, + "baseline_check": asdict(result.baseline_check), + "check_1_02": asdict(result.check_1_02), + "check_1_05": asdict(result.check_1_05), + "check_1_10": asdict(result.check_1_10), + "retest_check": asdict(result.retest_check) if result.retest_check else None, + }, + } + + if config.mode == "search": + result = search_threshold_over_candidates( + params=params, + candidate_T_values=candidate_values, + runs=config.runs, + seed_start=config.seed, + cache_path=config.cache_path, + jobs=config.jobs, + ) + return { + "mode": "search", + "parameters": parameter_payload(params), + "candidates": candidate_values, + "result": None + if result is None + else { + "threshold_T": result.threshold_T, + "baseline_check": asdict(result.baseline_check), + "check_1_02": asdict(result.check_1_02), + "check_1_05": asdict(result.check_1_05), + "check_1_10": asdict(result.check_1_10), + "retest_check": asdict(result.retest_check) if result.retest_check else None, + }, + } + + if config.mode == "loci_regression": + loci_values = config.loci_values if config.loci_values is not None else [1, 2, 3, 4, 5, 6, 7] + sweep = sweep_number_of_loci( + params=params, + loci_values=loci_values, + candidate_T_values=candidate_values, + runs=config.runs, + seed_start=config.seed, + cache_path=config.cache_path, + jobs=config.jobs, + ) + return { + "mode": "loci_regression", + "parameters": parameter_payload(params), + "loci_values": loci_values, + "candidates": candidate_values, + "rows": [asdict(row) for row in sweep.rows], + "fit": None if sweep.fit is None else asdict(sweep.fit), + } + + raise ValueError(f"Unsupported Track 1 mode: {config.mode}") diff --git a/tests/test_track1_api.py b/tests/test_track1_api.py new file mode 100644 index 0000000..d04d960 --- /dev/null +++ b/tests/test_track1_api.py @@ -0,0 +1,40 @@ +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) + +from renunney import track1_api as api + + +def test_run_config_simulate_mode_returns_contract(): + config = api.Track1RunConfig(mode="simulate", K=5000, N0=20, n=1, u=5e-6, R=10.0, T=20, seed=1) + payload = api.run_config(config) + assert payload["mode"] == "simulate" + assert payload["parameters"]["u"] == 5e-6 + assert payload["parameters"]["M"] == 0.05 + assert "final_summary" in payload + assert "tracking_summary" in payload + + +def test_config_roundtrip_from_mapping_and_file(tmp_path: Path): + raw = { + "mode": "search", + "K": 5000, + "N0": 20, + "n": 1, + "u": 5e-6, + "R": 10.0, + "T": 20, + "runs": 1, + "jobs": 1, + "t_values": [5, 10], + } + cfg = api.config_from_mapping(raw) + path = tmp_path / "track1.json" + path.write_text(json.dumps(raw), encoding="utf-8") + loaded = api.load_config(path) + assert loaded == cfg