diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..23ba550 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +version: "3.9" +services: +rust-dev: +build: +context: . +dockerfile: docker/Dockerfile.rust-dev +volumes: +- ./:/work +working_dir: /work +command: bash -lc "cargo test --release" + + +wasm: +build: +context: . +dockerfile: docker/Dockerfile.wasm +volumes: +- ./:/work +working_dir: /work +command: bash -lc "make wasm && python3 -m http.server -d bindings/wasm/pkg 8000" +ports: +- "8000:8000" + + +python: +build: +context: . +dockerfile: docker/Dockerfile.rust-dev +volumes: +- ./:/work +working_dir: /work +command: bash -lc "make py && python -c 'from symreg_rs import PySymbolicRegressor as SR; print(SR().fit_predict([[0.0],[1.0]],[0.0,1.0]))'" diff --git a/.gitignore b/.gitignore index da7e412..6504a95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,229 +1,10 @@ -# ---> Rust -# Generated by Cargo -# will have compiled files and executables -debug/ target/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - -# ---> Emacs -# -*- mode: gitignore; -*- -*~ -\#*\# -/.emacs.desktop -/.emacs.desktop.lock -*.elc -auto-save-list -tramp -.\#* - -# Org-mode -.org-id-locations -*_archive - -# flymake-mode -*_flymake.* - -# eshell files -/eshell/history -/eshell/lastdir - -# elpa packages -/elpa/ - -# reftex files -*.rel - -# AUCTeX auto folder -/auto/ - -# cask packages -.cask/ -dist/ - -# Flycheck -flycheck_*.el - -# server auth directory -/server/ - -# projectiles files -.projectile - -# directory configuration -.dir-locals.el - -# network security -/network-security.data - - -# ---> Python -# Byte-compiled / optimized / DLL files +**/*.pyc __pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - +*.pyo +.vscode/ +.idea/ +.DS_Store +bindings/python/target/ +bindings/python/*.egg-info/ +bindings/wasm/pkg/ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1dd11e9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "symreg-rs" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +description = "Fast, extensible symbolic regression in Rust with Python bindings and WASM target" +repository = "https://example.org/your/repo" + + +[lib] +name = "symreg_rs" +crate-type = ["rlib", "cdylib"] + + +[features] +default = ["simd"] +simd = [] +python = ["pyo3/extension-module", "pyo3/macros"] +wasm = ["wasm-bindgen"] + + +[dependencies] +rand = "0.8" +rand_distr = "0.4" +smallvec = "1" +bitflags = "2" +thiserror = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +rayon = "1" +ordered-float = "4" + + +# algebra & optimization +nalgebra = { version = "0.33", default-features = false, features=["std"] } +argmin = { version = "0.8", optional = true } + + +# optional rewriting via e-graphs +egg = { version = "0.9", optional = true } + + +# bindings +pyo3 = { version = "0.21", optional = true, features=["abi3", "abi3-py38"] } +wasm-bindgen = { version = "0.2", optional = true } + + +[dev-dependencies] +criterion = "0.5" + + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bc04d86 --- /dev/null +++ b/Makefile @@ -0,0 +1,38 @@ +.PHONY: build test bench fmt lint py sdist wheel wasm demo + + +build: +cargo build --release + + +test: +cargo test --all --release + + +bench: +cargo bench + + +fmt: +cargo fmt --all +cargo clippy --all-targets --all-features -- -D warnings + + +py: +maturin develop -m bindings/python/pyproject.toml + + +sdist: +maturin sdist -m bindings/python/pyproject.toml + + +wheel: +maturin build -m bindings/python/pyproject.toml --release + + +wasm: +wasm-pack build bindings/wasm --target web --release + + +demo: +python3 -m http.server -d bindings/wasm/pkg 8000 diff --git a/README.md b/README.md index 2e0a05d..c3d3a23 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,68 @@ # sr-rs SR-RS: A portable symbolic regression engine in Rust, patterned after Cranmer's SymbolicRegression.jl. + +**Rust symbolic regression** with: + + +- 🦀 **Core in Rust** (fast evaluators, SIMD where available) +- 🐍 **Python bindings** (PyO3, scikit-learn-style API) +- 🌐 **WASM target** (`wasm-bindgen`, in-browser demo) +- ♻️ **Rewrite-based simplification** (optional `egg` integration) +- 🎯 Multi-objective search (error vs. complexity), constant-fitting, protected operators, and export to SymPy/LaTeX + + +## Quick start + + +### Native (Rust) +```bash +cargo run --example quickstart + + +Python +# inside container or host with Rust toolchain +make py +python -c "from symreg_rs import SymbolicRegressor; print(SymbolicRegressor().fit_predict([[0.0],[1.0]], [0.0,1.0]))" +WebAssembly demo +make wasm && make demo +# open http://localhost:8000/index.html +Design highlights + +Expression AST with typed operations and arity checks + +Fast vectorized evaluator (feature simd) + +GP loop with tournament selection, variation (mutate/crossover), and Pareto archiving + +Constant optimization via LM/Argmin (optional) + +Simplification via identities + (optional) egg equality saturation + +See docs/DESIGN.md and docs/PORTING.md for details and porting guidance. + +9) Usage snippets +Rust +use symreg_rs::{GpConfig, SymbolicRegressor}; +let x = vec![vec![0.0], vec![1.0]]; +let y = vec![0.0, 1.0]; +let sr = SymbolicRegressor::default(); +let expr = sr.fit(&x, &y); +Python +from symreg_rs import PySymbolicRegressor as SR +sr = SR(pop_size=256, gens=20, num_vars=1) +preds = sr.fit_predict([[0.0],[1.0]],[0.0,1.0]) +10) Next steps (recommended) + +Wire up constant fitting (Argmin LM) in fitness.rs. + +Add exporters: to SymPy string and LaTeX. + +Implement subtree crossover/mutation and depth/size limits. + +Add vectorized evaluator (std::simd) + rayon parallelism. + +Optional: integrate egg for global simplification. + +This scaffold should compile with minor tweaks (crate names/versions). From here, we can iterate on the porting branch to reach feature parity with your preferred SR baseline. + diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 0000000..7b657c1 --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["maturin>=1.4"] +build-backend = "maturin" + + +[project] +name = "symreg-rs" +version = "0.1.0" +description = "Rust symbolic regression with Python bindings" +authors = [{name="Your Name", email="you@example.com"}] +requires-python = ">=3.8" +classifiers = [ +"Programming Language :: Python :: 3", +"Programming Language :: Rust", +] + + +[tool.maturin] +features = ["python"] +module-name = "symreg_rs" diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs new file mode 100644 index 0000000..fd19eda --- /dev/null +++ b/bindings/python/src/lib.rs @@ -0,0 +1,33 @@ +use pyo3::prelude::*; +use symreg_rs::{GpConfig, SymbolicRegressor}; + + +#[pyclass] +struct PySymbolicRegressor { inner: SymbolicRegressor } + + +#[pymethods] +impl PySymbolicRegressor { +#[new] +fn new(pop_size: Option, gens: Option, num_vars: Option) -> Self { +let mut cfg = GpConfig::default(); +if let Some(v) = pop_size { cfg.pop_size = v; } +if let Some(v) = gens { cfg.gens = v; } +if let Some(v) = num_vars { cfg.num_vars = v; } +Self { inner: SymbolicRegressor::new(cfg) } +} + + +fn fit_predict(&self, x: Vec>, y: Vec, predict_x: Option>>) -> PyResult> { +let expr = self.inner.fit(&x, &y); +let px = predict_x.unwrap_or(x); +Ok(px.iter().map(|row| symreg_rs::eval::eval_expr(&expr, row)).collect()) +} +} + + +#[pymodule] +fn symreg_rs(_py: Python, m: &PyModule) -> PyResult<()> { +m.add_class::()?; +Ok(()) +} diff --git a/bindings/wasm/index.html b/bindings/wasm/index.html new file mode 100644 index 0000000..2b8e08e --- /dev/null +++ b/bindings/wasm/index.html @@ -0,0 +1,19 @@ + + +symreg-rs WASM demo + +

symreg-rs (WASM)

+ +

+
+
+
diff --git a/bindings/wasm/package.json b/bindings/wasm/package.json
new file mode 100644
index 0000000..a150cd8
--- /dev/null
+++ b/bindings/wasm/package.json
@@ -0,0 +1,8 @@
+{
+"name": "symreg-rs-wasm",
+"version": "0.1.0",
+"scripts": {
+"build": "wasm-pack build --target web --release",
+"serve": "python3 -m http.server -d pkg 8000"
+}
+}
diff --git a/bindings/wasm/src/lib.rs b/bindings/wasm/src/lib.rs
new file mode 100644
index 0000000..b55cce5
--- /dev/null
+++ b/bindings/wasm/src/lib.rs
@@ -0,0 +1,15 @@
+use wasm_bindgen::prelude::*;
+use symreg_rs::{GpConfig, SymbolicRegressor};
+
+
+#[wasm_bindgen]
+pub fn fit_mse(x: Vec, n_rows: usize, n_cols: usize, y: Vec, gens: usize) -> Vec {
+let mut rows = Vec::with_capacity(n_rows);
+for r in 0..n_rows {
+let start = r*n_cols; rows.push(x[start..start+n_cols].to_vec());
+}
+let cfg = GpConfig { gens, num_vars: n_cols, ..Default::default() };
+let sr = SymbolicRegressor::new(cfg);
+let expr = sr.fit(&rows, &y);
+rows.iter().map(|row| symreg_rs::eval::eval_expr(&expr, row)).collect()
+}
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..23ba550
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
+version: "3.9"
+services:
+rust-dev:
+build:
+context: .
+dockerfile: docker/Dockerfile.rust-dev
+volumes:
+- ./:/work
+working_dir: /work
+command: bash -lc "cargo test --release"
+
+
+wasm:
+build:
+context: .
+dockerfile: docker/Dockerfile.wasm
+volumes:
+- ./:/work
+working_dir: /work
+command: bash -lc "make wasm && python3 -m http.server -d bindings/wasm/pkg 8000"
+ports:
+- "8000:8000"
+
+
+python:
+build:
+context: .
+dockerfile: docker/Dockerfile.rust-dev
+volumes:
+- ./:/work
+working_dir: /work
+command: bash -lc "make py && python -c 'from symreg_rs import PySymbolicRegressor as SR; print(SR().fit_predict([[0.0],[1.0]],[0.0,1.0]))'"
diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python
new file mode 100644
index 0000000..18414d5
--- /dev/null
+++ b/docker/Dockerfile.python
@@ -0,0 +1,5 @@
+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y build-essential python3-dev && rm -rf /var/lib/apt/lists/*
+# wheel build happens via maturin in rust-dev; this image is for runtime
+RUN pip install numpy
+WORKDIR /app
diff --git a/docker/Dockerfile.rust-dev b/docker/Dockerfile.rust-dev
new file mode 100644
index 0000000..0b120f0
--- /dev/null
+++ b/docker/Dockerfile.rust-dev
@@ -0,0 +1,4 @@
+FROM rust:1.80
+RUN apt-get update && apt-get install -y python3 python3-pip pkg-config libssl-dev && rm -rf /var/lib/apt/lists/*
+RUN pip3 install maturin wasm-pack
+WORKDIR /work
diff --git a/docker/Dockerfile.wasm b/docker/Dockerfile.wasm
new file mode 100644
index 0000000..41dd02a
--- /dev/null
+++ b/docker/Dockerfile.wasm
@@ -0,0 +1,3 @@
+FROM rust:1.80
+RUN cargo install wasm-pack
+WORKDIR /work
diff --git a/docker/entrypoints/python-build.sh b/docker/entrypoints/python-build.sh
new file mode 100644
index 0000000..16897f2
--- /dev/null
+++ b/docker/entrypoints/python-build.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -euo pipefail
+make wheel
+exec "$@"
diff --git a/docker/entrypoints/rust-dev.sh b/docker/entrypoints/rust-dev.sh
new file mode 100644
index 0000000..a45dc6a
--- /dev/null
+++ b/docker/entrypoints/rust-dev.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cargo build --release
+exec "$@"
diff --git a/docs/BENCHMARK.md b/docs/BENCHMARK.md
new file mode 100644
index 0000000..db1f150
--- /dev/null
+++ b/docs/BENCHMARK.md
@@ -0,0 +1,29 @@
+# BENCHMARKS
+
+
+- Use `criterion` for microbenchmarks (eval throughput).
+- Dataset-level timing: fit to Friedman1 and report wall-clock and MSE.
+- Compare feature toggles: scalar vs SIMD, parallel vs single-thread.
+
+
+
+---
+
+
+## 8) Branching model (with a `porting` branch)
+
+
+- `main`: stable, tagged releases.
+- `develop`: integration of ready features.
+- `porting`: **all work specific to parity with SymbolicRegression.jl** (feature-by-feature ports, benchmarks, notes). Merge into `develop` via PRs when each slice reaches MVP.
+- Per-feature branches from `porting` (e.g., `porting/const-fitting`, `porting/egg-rewrites`).
+
+
+**Initial commands:**
+```bash
+git init
+git checkout -b main
+git add . && git commit -m "feat: repo scaffold"
+git checkout -b develop
+git checkout -b porting
+
diff --git a/docs/DESIGN.md b/docs/DESIGN.md
new file mode 100644
index 0000000..51174be
--- /dev/null
+++ b/docs/DESIGN.md
@@ -0,0 +1,25 @@
+# DESIGN
+
+
+## Goals
+- Fast core with predictable performance (no runtime JIT)
+- Clean APIs for Python and Web
+- Extensibility: pluggable operators, fitness terms, search strategies
+
+
+## Key components
+- AST (`ast.rs`), evaluator (`eval.rs`), operators (`ops.rs`)
+- Fitness metrics (`fitness.rs`), GP algorithm (`gp.rs`)
+- Simplification (`simplify/`), utilities (`utils.rs`)
+
+
+## Error/Complexity tradeoff
+- Use bi-objective selection or scalarized cost; Pareto archive maintained per generation.
+
+
+## Safety
+- Protected operators for division/log; numeric guards where needed.
+
+
+## Determinism
+- Seeded RNG exposed in configs for reproducible runs.
diff --git a/docs/PORTING.md b/docs/PORTING.md
new file mode 100644
index 0000000..8f2e876
--- /dev/null
+++ b/docs/PORTING.md
@@ -0,0 +1,42 @@
+# Porting plan (from SymbolicRegression.jl concepts)
+
+
+This document maps major features/design choices to Rust equivalents and notes feasibility.
+
+
+## Search & representation
+- **Representation:** tree/graph AST with typed ops. Rust enums for ops and nodes. Feasible (done in MVP).
+- **Search:** GP with tournament selection + crossover/mutation, plus Pareto archiving. Extensible to age-fitness, lexicase.
+- **Constants:** LM/variable projection via `argmin`/`nalgebra`. Start with LM; later add autodiff for local gradients.
+
+
+## Evaluation performance
+- **Julia’s fused loops & SIMD:** replicate with `std::simd` and hand-rolled evaluators. Bench and specialize hot ops.
+- **Parallelism:** use `rayon` for population eval; thread support optional in WASM (SharedArrayBuffer route).
+
+
+## Simplification & pruning
+- **Local rules:** identities (e.g., `x+0→x`, `x*1→x`, `sin(0)→0`).
+- **Global rewriting:** integrate `egg` as optional feature for equality saturation + cost-based extraction.
+
+
+## Multi-objective
+- Maintain Pareto front on (error, complexity). Provide knobs for complexity penalty and max size/depth.
+
+
+## Export & interop
+- **Python:** PyO3 module `symreg_rs` with sklearn-like API (`fit`, `predict`, `score`).
+- **WASM:** thin bindgen façade for browser demos; avoid heavy deps.
+- **SymPy/LaTeX:** stringifier to SymPy code, plus LaTeX pretty-printing.
+
+
+## Test & benchmark
+- Reuse standard SRBench datasets where licensing permits; include Friedman1 synthetic.
+- Add criterion benches for evaluator and variation operators.
+
+
+## Milestones
+1. MVP (this scaffold): scalar eval, GP loop, Python & WASM hello world.
+2. Vectorized eval + `rayon` parallel pop eval; constant fitting.
+3. Simplifier + export; sklearn-compatible estimator.
+4. E-graph integration; advanced search strategies; docs + notebooks.
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
new file mode 100644
index 0000000..4c0cc9e
--- /dev/null
+++ b/docs/ROADMAP.md
@@ -0,0 +1,11 @@
+# ROADMAP
+
+
+- [ ] SIMD evaluator
+- [ ] Parallel pop evaluation (rayon)
+- [ ] LM constant fitting
+- [ ] SymPy/LaTeX exporters
+- [ ] sklearn API parity (fit/predict/score, get_params/set_params)
+- [ ] E-graph simplification (egg)
+- [ ] Web demo with worker + shared memory
+- [ ] SRBench harness integration
diff --git a/examples/quickstart.rs b/examples/quickstart.rs
new file mode 100644
index 0000000..c72758f
--- /dev/null
+++ b/examples/quickstart.rs
@@ -0,0 +1,11 @@
+use symreg_rs::{GpConfig, SymbolicRegressor};
+
+
+fn main() {
+let x: Vec> = (0..50).map(|i| vec![i as f64 / 10.0]).collect();
+let y: Vec = x.iter().map(|v| v[0].sin()).collect();
+let cfg = GpConfig { num_vars: 1, gens: 10, ..Default::default() };
+let sr = SymbolicRegressor::new(cfg);
+let expr = sr.fit(&x, &y);
+println!("best expr nodes={}, root={}", expr.nodes.len(), expr.root);
+}
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..2835f46
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,34 @@
+use serde::{Deserialize, Serialize};
+
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
+pub enum Op {
+Add, Sub, Mul, Div, Sin, Cos, Exp, Log,
+// extendable: Tan, Abs, Pow, etc.
+}
+
+
+impl Op {
+pub fn arity(&self) -> usize {
+use Op::*;
+match self {
+Add | Sub | Mul | Div => 2,
+Sin | Cos | Exp | Log => 1,
+}
+}
+}
+
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum Node {
+Var(usize),
+Const(f64),
+Call(Op, smallvec::SmallVec<[usize; 4]>), // children indices
+}
+
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct Expr {
+pub nodes: Vec,
+pub root: usize,
+}
diff --git a/src/eval.rs b/src/eval.rs
new file mode 100644
index 0000000..d4c7e78
--- /dev/null
+++ b/src/eval.rs
@@ -0,0 +1,25 @@
+use crate::ast::{Expr, Node, Op};
+use crate::ops::{apply_unary, protected_div};
+
+
+pub fn eval_expr(expr: &Expr, x: &[f64]) -> f64 {
+// simple stack-based evaluator (scalar); SIMD/vectorized path can be added
+let mut cache: Vec = vec![0.0; expr.nodes.len()];
+for (i, node) in expr.nodes.iter().enumerate() {
+let val = match node {
+Node::Var(k) => x[*k],
+Node::Const(c) => *c,
+Node::Call(op, ch) => {
+match op {
+Op::Add => cache[ch[0]] + cache[ch[1]],
+Op::Sub => cache[ch[0]] - cache[ch[1]],
+Op::Mul => cache[ch[0]] * cache[ch[1]],
+Op::Div => protected_div(cache[ch[0]], cache[ch[1]]),
+_ => apply_unary(*op, cache[ch[0]]),
+}
+}
+};
+cache[i] = val;
+}
+cache[expr.root]
+}
diff --git a/src/fitness.rs b/src/fitness.rs
new file mode 100644
index 0000000..a00cb78
--- /dev/null
+++ b/src/fitness.rs
@@ -0,0 +1,17 @@
+use crate::ast::Expr;
+use crate::eval::eval_expr;
+
+
+pub fn mse(expr: &Expr, x: &[Vec], y: &[f64]) -> f64 {
+let n = x.len();
+let mut s = 0.0;
+for i in 0..n {
+let yi = eval_expr(expr, &x[i]);
+let d = yi - y[i];
+s += d * d;
+}
+s / n as f64
+}
+
+
+pub fn complexity(expr: &Expr) -> usize { expr.nodes.len() }
diff --git a/src/gp.rs b/src/gp.rs
new file mode 100644
index 0000000..1b770ff
--- /dev/null
+++ b/src/gp.rs
@@ -0,0 +1,73 @@
+use rand::{seq::SliceRandom, Rng};
+use serde::{Deserialize, Serialize};
+
+
+use crate::ast::{Expr, Node, Op};
+use crate::fitness::{complexity, mse};
+
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GpConfig {
+pub pop_size: usize,
+pub max_depth: usize,
+pub mutation_rate: f64,
+pub crossover_rate: f64,
+pub gens: usize,
+pub num_vars: usize,
+}
+
+
+impl Default for GpConfig {
+fn default() -> Self {
+Self { pop_size: 256, max_depth: 6, mutation_rate: 0.2, crossover_rate: 0.8, gens: 50, num_vars: 1 }
+}
+}
+
+
+#[derive(Debug, Clone)]
+pub struct Individual { pub expr: Expr, pub err: f64, pub complx: usize }
+
+
+pub struct SymbolicRegressor { pub cfg: GpConfig }
+
+
+impl Default for SymbolicRegressor { fn default() -> Self { Self { cfg: GpConfig::default() } } }
+
+
+impl SymbolicRegressor {
+pub fn new(cfg: GpConfig) -> Self { Self { cfg } }
+
+
+pub fn fit(&self, x: &[Vec], y: &[f64]) -> Expr {
+let mut rng = rand::thread_rng();
+let mut pop: Vec = (0..self.cfg.pop_size)
+.map(|_| Individual { expr: rand_expr(self.cfg.num_vars, self.cfg.max_depth, &mut rng), err: f64::INFINITY, complx: 0 })
+.collect();
+eval_pop(&mut pop, x, y);
+for _gen in 0..self.cfg.gens {
+pop.sort_by(|a,b| a.err.partial_cmp(&b.err).unwrap());
+let elite = pop[0].clone();
+let mut next = vec![elite];
+while next.len() < pop.len() {
+if rng.gen::() < self.cfg.crossover_rate {
+let (a,b) = tournament2(&pop, &mut rng);
+next.push(crossover(&a.expr, &b.expr, &mut rng));
+} else if rng.gen::() < self.cfg.mutation_rate {
+let a = &pop[rng.gen_range(0..pop.len())];
+next.push(mutate(&a.expr, self.cfg.num_vars, self.cfg.max_depth, &mut rng));
+} else {
+next.push(pop[rng.gen_range(0..pop.len())].clone());
+}
+}
+pop = next;
+eval_pop(&mut pop, x, y);
+}
+pop.sort_by(|a,b| a.err.partial_cmp(&b.err).unwrap());
+pop[0].expr.clone()
+}
+}
+
+
+fn eval_pop(pop: &mut [Individual], x: &[Vec], y: &[f64]) {
+pop.iter_mut().for_each(|ind| {
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..26bee8a
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,10 @@
+pub mod ast;
+pub mod eval;
+pub mod ops;
+pub mod fitness;
+pub mod gp;
+pub mod simplify;
+pub mod utils;
+
+
+pub use crate::gp::{GpConfig, SymbolicRegressor};
diff --git a/src/ops.rs b/src/ops.rs
new file mode 100644
index 0000000..4aa43d6
--- /dev/null
+++ b/src/ops.rs
@@ -0,0 +1,17 @@
+use crate::ast::Op;
+
+
+pub fn protected_div(a: f64, b: f64) -> f64 {
+if b.abs() < 1e-12 { a } else { a / b }
+}
+
+
+pub fn apply_unary(op: Op, x: f64) -> f64 {
+match op {
+Op::Sin => x.sin(),
+Op::Cos => x.cos(),
+Op::Exp => x.exp(),
+Op::Log => if x.abs() < 1e-12 { 0.0 } else { x.abs().ln() },
+_ => unreachable!("not unary"),
+}
+}
diff --git a/src/simplify/mod.rs b/src/simplify/mod.rs
new file mode 100644
index 0000000..a4b5b6c
--- /dev/null
+++ b/src/simplify/mod.rs
@@ -0,0 +1 @@
+pub mod rules;
diff --git a/src/simplify/rules.rs b/src/simplify/rules.rs
new file mode 100644
index 0000000..dd01823
--- /dev/null
+++ b/src/simplify/rules.rs
@@ -0,0 +1 @@
+// placeholder for local simplification rules; egg-based rules can live in crates/egg-rewrites