Adding LLM evaluation code for faster processing.

2025-09-20 20:33:55 -04:00 · 2025-09-20 20:33:55 -04:00 · 4564f53577
parent 67b9c88cba
commit 4564f53577
8 changed files with 982 additions and 0 deletions
--- a/alice_fast/init.py
+++ b/alice_fast/init.py
@ -0,0 +1,5 @@
+# ALICE — fast batched kernels (Step 1+)
+from .kernels import PASS, PEEK, EAT, IDLE
+from .batched_belt import BatchedBelt
+
+__all__ = ["PASS", "PEEK", "EAT", "IDLE", "BatchedBelt"]
--- a/alice_fast/batched_belt.py
+++ b/alice_fast/batched_belt.py
@ -0,0 +1,79 @@
+from __future__ import annotations
+import numpy as np
+from .kernels import (
+    epsilon_greedy_batch,
+    step_transition_batch,
+    reward_batch,
+    q_learning_update_batch,
+)
+
+class BatchedBelt:
+    """
+    Homogeneous batch of puzzle boxes sharing the same (S, A) and transition table.
+    For Step 1 speedups, we assume a shared reward table; heterogeneity can be layered later.
+
+    Non-advancing PEEK via augmented state:
+      - Model two states per puzzle: 0=unpeeked, 1=peeked.
+      - Set transition_table[unpeeked, PEEK] = peeked, and transition_table[peeked, PEEK] = peeked.
+      - Keep PASS/EAT semantics as desired.
+    """
+    def __init__(self,
+                 n_states: int,
+                 n_actions: int,
+                 transition_table: np.ndarray,   # [S, A] -> next_state
+                 reward_table: np.ndarray,       # [S, A, S] -> reward
+                 base_action_costs: np.ndarray,  # [A]
+                 batch_size: int,
+                 gamma: float = 0.97,
+                 alpha: float = 0.2,
+                 epsilon: float = 0.05,
+                 seed: int = 1234):
+        self.S, self.A, self.B = int(n_states), int(n_actions), int(batch_size)
+        self.tt = transition_table.astype(np.int32, copy=False)
+        self.rt = reward_table.astype(np.float32, copy=False)
+        self.base_costs = base_action_costs.astype(np.float32, copy=False)
+        self.gamma, self.alpha, self.epsilon = float(gamma), float(alpha), float(epsilon)
+
+        self.rng = np.random.default_rng(seed)
+        self.states = np.zeros(self.B, dtype=np.int32)  # default start state 0 (unpeeked)
+        self.q = np.zeros((self.S, self.A), dtype=np.float32)
+
+        # Preallocated buffers (avoid per-step allocations)
+        self._u = np.empty(self.B, dtype=np.float64)         # explore vs exploit
+        self._rand_actions = np.empty(self.B, dtype=np.int32)
+        self._actions = np.empty(self.B, dtype=np.int32)
+        self._next_states = np.empty(self.B, dtype=np.int32)
+        self._rewards = np.empty(self.B, dtype=np.float32)
+        self._terminal_mask = np.zeros(self.S, dtype=np.bool_)
+
+    def reset_states(self, start_state: int = 0):
+        self.states.fill(start_state)
+
+    def step_learn(self):
+        """
+        One batched interaction + Q update:
+          - ε-greedy actions from Q(s,·)
+          - transition
+          - reward
+          - TD(0) update
+        """
+        # Pre-generate randomness without Python loops
+        self._u[:] = self.rng.random(self.B)
+        self._rand_actions[:] = self.rng.integers(0, self.A, size=self.B, dtype=np.int32)
+
+        q_s = self.q[self.states]  # view: [B, A]
+        self._actions[:] = epsilon_greedy_batch(q_s, self.epsilon, self._u, self._rand_actions)
+
+        self._next_states[:] = step_transition_batch(self.states, self._actions, self.tt, self._terminal_mask)
+        self._rewards[:] = reward_batch(self.states, self._actions, self._next_states, self.rt, self.base_costs)
+
+        q_learning_update_batch(self.q, self.states, self._actions, self._rewards, self._next_states,
+                                self.alpha, self.gamma)
+
+        self.states[:] = self._next_states
+        return {
+            "actions": self._actions.copy(),
+            "rewards": self._rewards.copy(),
+            "states": self.states.copy(),
+        }
+
--- a/alice_fast/kernels.py
+++ b/alice_fast/kernels.py
@ -0,0 +1,103 @@
+from __future__ import annotations
+import numpy as np
+from numba import njit, prange
+
+# Canonical action indices; keep aligned with your environment
+PASS, PEEK, EAT, IDLE = 0, 1, 2, 3
+
+
+@njit(cache=True, fastmath=False)
+def epsilon_greedy_batch(q_values: np.ndarray,
+                         epsilon: float,
+                         rng_uniform: np.ndarray,       # [B] in [0,1)
+                         rng_actions: np.ndarray) -> np.ndarray:  # [B] ints (unbounded)
+    """
+    Batch ε-greedy over Q(s, a).
+    q_values: [B, A] Q-values for each batch element
+    rng_uniform: [B] pre-generated U(0,1) for branch
+    rng_actions: [B] pre-generated ints for unbiased random actions
+    returns actions [B]
+    """
+    B, A = q_values.shape
+    actions = np.empty(B, dtype=np.int32)
+    for i in range(B):
+        if rng_uniform[i] < epsilon:
+            actions[i] = rng_actions[i] % A  # unbiased random pick in [0, A)
+        else:
+            best_a = 0
+            best_q = q_values[i, 0]
+            for a in range(1, A):
+                q = q_values[i, a]
+                if q > best_q:
+                    best_q = q
+                    best_a = a
+            actions[i] = best_a
+    return actions
+
+
+@njit(cache=True, fastmath=False, parallel=True)
+def step_transition_batch(states: np.ndarray,
+                          actions: np.ndarray,
+                          tt: np.ndarray,
+                          terminal_mask: np.ndarray) -> np.ndarray:
+    """
+    Fast FSM transition:
+    states: [B], actions: [B]
+    tt: [S, A] -> next_state
+    terminal_mask: [S] (kept for future terminal logic; unused here)
+    """
+    B = states.shape[0]
+    next_states = np.empty_like(states)
+    for i in prange(B):
+        s = states[i]
+        a = actions[i]
+        ns = tt[s, a]
+        next_states[i] = ns
+    return next_states
+
+
+@njit(cache=True, fastmath=False, parallel=True)
+def reward_batch(states: np.ndarray,
+                 actions: np.ndarray,
+                 next_states: np.ndarray,
+                 reward_table: np.ndarray,
+                 base_action_costs: np.ndarray) -> np.ndarray:
+    """
+    Reward lookup with per-(s,a,ns) extrinsic reward + base action costs.
+    reward_table: [S, A, S]
+    base_action_costs: [A]
+    """
+    B = states.shape[0]
+    r = np.empty(B, dtype=np.float32)
+    for i in prange(B):
+        r[i] = reward_table[states[i], actions[i], next_states[i]] + base_action_costs[actions[i]]
+    return r
+
+
+@njit(cache=True, fastmath=False, parallel=True)
+def q_learning_update_batch(q_values: np.ndarray,
+                            states: np.ndarray,
+                            actions: np.ndarray,
+                            rewards: np.ndarray,
+                            next_states: np.ndarray,
+                            alpha: float,
+                            gamma: float) -> None:
+    """
+    In-place TD(0)/Q-learning update over a batch.
+    q_values: [S, A]
+    """
+    B = states.shape[0]
+    A = q_values.shape[1]
+    for i in prange(B):
+        s = states[i]
+        a = actions[i]
+        ns = next_states[i]
+        # max_a' Q(ns, a')
+        max_q = q_values[ns, 0]
+        for ap in range(1, A):
+            if q_values[ns, ap] > max_q:
+                max_q = q_values[ns, ap]
+        td_target = rewards[i] + gamma * max_q
+        td_error = td_target - q_values[s, a]
+        q_values[s, a] += alpha * td_error
+
--- a/alice_tools/sequence.py
+++ b/alice_tools/sequence.py
@ -0,0 +1,325 @@
+from __future__ import annotations
+import numpy as np
+from typing import List, Tuple, Generator, Optional, Dict
+
+# -----------------------
+# Diagnostics / Utilities
+# -----------------------
+
+def audit_sequence(seq: np.ndarray, k: int) -> dict:
+    """Return basic stats: counts, max run length, first/second half counts."""
+    n = len(seq)
+    counts = np.bincount(seq, minlength=k)
+
+    # Max run length
+    max_run = 1 if n > 0 else 0
+    cur_run = 1
+    for i in range(1, n):
+        if seq[i] == seq[i - 1]:
+            cur_run += 1
+            if cur_run > max_run:
+                max_run = cur_run
+        else:
+            cur_run = 1
+
+    # Half-balance
+    h = n // 2
+    first = np.bincount(seq[:h], minlength=k)
+    second = np.bincount(seq[h:], minlength=k)
+    return {
+        "counts": counts,
+        "max_run": int(max_run),
+        "first_half": first,
+        "second_half": second,
+    }
+
+
+def rolling_tail_state(seq: np.ndarray) -> Tuple[int, int]:
+    """
+    Compute the tail symbol and its current run length for a finished sequence.
+    Use this to "roll" constraints across concatenated chunks.
+
+    Returns:
+        (last_symbol, tail_run_len). If seq is empty => (-1, 0).
+    """
+    if len(seq) == 0:
+        return -1, 0
+    last = int(seq[-1])
+    run_len = 1
+    for i in range(len(seq) - 2, -1, -1):
+        if int(seq[i]) == last:
+            run_len += 1
+        else:
+            break
+    return last, run_len
+
+
+# -----------------------
+# Core builders
+# -----------------------
+
+def _build_targets(
+    n: int,
+    k: int,
+    *,
+    exact_counts: bool,
+    rng: np.random.Generator,
+) -> Tuple[np.ndarray, int]:
+    """
+    Compute per-class target counts and (possibly adjusted) length.
+    If exact_counts=True, we round n down to a multiple of k.
+    Otherwise we keep n and distribute the remainder randomly across symbols.
+    """
+    if exact_counts:
+        n_eff = (n // k) * k
+        base = n_eff // k
+        target = np.full(k, base, dtype=np.int32)
+        return target, n_eff
+
+    # Keep requested length; remainder distributed (random, to avoid bias)
+    n_eff = n
+    base = n // k
+    target = np.full(k, base, dtype=np.int32)
+    r = n % k
+    if r > 0:
+        # Randomly choose which symbols get +1
+        idx = rng.permutation(k)[:r]
+        target[idx] += 1
+    return target, n_eff
+
+
+def _construct_sequence(
+    n: int,
+    k: int,
+    run_cap: int,
+    *,
+    seed: int,
+    exact_counts: bool,
+    half_balance: bool,
+    init_last_symbol: int = -1,
+    init_run_len: int = 0,
+    backtrack_window: int = 32,
+) -> np.ndarray:
+    """
+    Incremental randomized greedy with light backtracking.
+    Enforces:
+      - per-class target counts,
+      - max run length <= run_cap,
+      - optional half-balance (first half counts <= ceil(target/2)).
+
+    Rolling-join guard:
+      You can pass (init_last_symbol, init_run_len) from a previous chunk to
+      ensure the very first choice won't violate run_cap at the boundary.
+    """
+    assert k >= 2, "k must be >= 2"
+    assert run_cap >= 1, "run_cap must be >= 1"
+    rng = np.random.default_rng(seed)
+
+    target, n_eff = _build_targets(n, k, exact_counts=exact_counts, rng=rng)
+
+    seq = np.full(n_eff, -1, dtype=np.int32)
+    counts = np.zeros(k, dtype=np.int32)
+
+    last_sym = int(init_last_symbol)
+    cur_run = int(init_run_len) if init_last_symbol != -1 else 0
+
+    # For half-balance enforcement
+    half_cap = None
+    if half_balance:
+        half_cap = (target + 1) // 2  # ceil(target/2)
+
+    # Backtracking checkpoints
+    stack: List[Tuple[int, np.ndarray, int, int]] = []  # (i, counts_copy, last_sym, cur_run)
+
+    i = 0
+    while i < n_eff:
+        # Build feasible candidate set
+        cand = []
+        for s in range(k):
+            if counts[s] >= target[s]:
+                continue
+            # Run-cap feasibility (respect boundary run)
+            prospective_run = cur_run + 1 if (s == last_sym) else 1
+            if prospective_run > run_cap:
+                continue
+            # Half-balance feasibility (first half only)
+            if half_balance and i < (n_eff // 2):
+                if counts[s] + 1 > half_cap[s]:
+                    continue
+            cand.append(s)
+
+        if not cand:
+            # Backtrack
+            if not stack:
+                raise RuntimeError(
+                    "Gellermann-k construction failed; try relaxing constraints "
+                    f"(n={n}, k={k}, run_cap={run_cap}, half_balance={half_balance}) "
+                    "or change seed."
+                )
+            i, counts, last_sym, cur_run = stack.pop()
+            # Note: we don't need to clear seq entries; we'll overwrite them.
+            continue
+
+        # Prefer least-used symbols; random tie-breakers;
+        rng.shuffle(cand)
+        cand.sort(key=lambda s: (counts[s], 1 if s == last_sym else 0))
+
+        s = cand[0]
+
+        # Occasionally checkpoint state for backtracking
+        if (i % backtrack_window) == 0:
+            stack.append((i, counts.copy(), last_sym, cur_run))
+
+        # Place symbol
+        seq[i] = s
+        counts[s] += 1
+        if s == last_sym:
+            cur_run += 1
+        else:
+            last_sym = s
+            cur_run = 1
+        i += 1
+
+    return seq
+
+
+# -----------------------
+# Public API
+# -----------------------
+
+def gellermann_k(
+    n: int,
+    k: int,
+    run_cap: int = 3,
+    *,
+    seed: int = 1234,
+    exact_counts: bool = False,
+    half_balance: bool = False,
+) -> np.ndarray:
+    """
+    Gellermann-style k-ary generator.
+
+    Args:
+        n: desired sequence length. If exact_counts=True, effective length becomes (n//k)*k.
+        k: alphabet size (>= 2).
+        run_cap: maximum allowed run length per symbol.
+        seed: RNG seed.
+        exact_counts: if True, force exactly equal counts by rounding n down to a multiple of k.
+                      if False, keep n and distribute remainder across symbols.
+        half_balance: if True, enforce counts in the first half <= ceil(target/2) for each symbol.
+
+    Returns:
+        np.ndarray of shape [n_eff] with symbols in 0..k-1
+    """
+    return _construct_sequence(
+        n=n,
+        k=k,
+        run_cap=run_cap,
+        seed=seed,
+        exact_counts=exact_counts,
+        half_balance=half_balance,
+        init_last_symbol=-1,
+        init_run_len=0,
+    )
+
+
+def build_sequence_with_state(
+    n: int,
+    k: int,
+    run_cap: int = 3,
+    *,
+    seed: int = 1234,
+    exact_counts: bool = False,
+    half_balance: bool = False,
+    prev_state: Optional[Tuple[int, int]] = None,
+) -> Tuple[np.ndarray, Tuple[int, int]]:
+    """
+    Construct a sequence and also return its tail state for safe rolling joins.
+
+    Args:
+        prev_state: optional (last_symbol, last_run_len) carried over from a previous chunk.
+
+    Returns:
+        (sequence, end_state) where end_state=(last_symbol, tail_run_len) for this chunk.
+    """
+    last_sym, run_len = (-1, 0) if prev_state is None else (int(prev_state[0]), int(prev_state[1]))
+    seq = _construct_sequence(
+        n=n,
+        k=k,
+        run_cap=run_cap,
+        seed=seed,
+        exact_counts=exact_counts,
+        half_balance=half_balance,
+        init_last_symbol=last_sym,
+        init_run_len=run_len,
+    )
+    end_state = rolling_tail_state(seq if last_sym == -1 else np.concatenate([[last_sym] * run_len, seq]))
+    return seq, end_state
+
+
+def yield_sequence(
+    n: int,
+    k: int,
+    run_cap: int = 3,
+    *,
+    seed: int = 1234,
+    exact_counts: bool = False,
+    half_balance: bool = False,
+    prev_state: Optional[Tuple[int, int]] = None,
+) -> Generator[int, None, None]:
+    """
+    Streaming-style wrapper that yields the sequence symbol-by-symbol.
+    Accepts a (last_symbol, last_run_len) prev_state to enforce a rolling-join guard
+    so concatenating generators never violates run_cap at the boundary.
+
+    Note:
+        Internally builds incrementally with backtracking, then yields.
+        (This keeps the logic robust while presenting a generator API.)
+    """
+    seq, _ = build_sequence_with_state(
+        n=n,
+        k=k,
+        run_cap=run_cap,
+        seed=seed,
+        exact_counts=exact_counts,
+        half_balance=half_balance,
+        prev_state=prev_state,
+    )
+    for s in seq:
+        yield int(s)
+
+
+# -----------------------
+# De Bruijn (exhaustive)
+# -----------------------
+
+def debruijn(k: int, m: int) -> np.ndarray:
+    """
+    de Bruijn sequence for alphabet k and subsequences of length m.
+    Returns an array of length k**m with each length-m subsequence appearing once (on a cycle).
+    """
+    a = [0] * (k * m)
+    sequence: List[int] = []
+
+    def db(t: int, p: int):
+        if t > m:
+            if m % p == 0:
+                sequence.extend(a[1:p + 1])
+        else:
+            a[t] = a[t - p]
+            db(t + 1, p)
+            for j in range(a[t - p] + 1, k):
+                a[t] = j
+                db(t + 1, t)
+
+    db(1, 1)
+    return np.array(sequence, dtype=np.int32)
+
+
+def tile_or_trim(seq: np.ndarray, n: int) -> np.ndarray:
+    """Tile (repeat) or trim a base sequence to length n."""
+    if len(seq) == 0:
+        return seq
+    reps = (n + len(seq) - 1) // len(seq)
+    out = np.tile(seq, reps)[:n]
+    return out
--- a/bench/README.md
+++ b/bench/README.md
@ -0,0 +1,36 @@
+# Bench
+
+Runs a synthetic finite-state “puzzle belt” over a *batch* of boxes.
+
+## Run
+
+```bash
+python -m pip install -r requirements.txt
+. scripts/bench_env.sh
+python bench/run_bench.py
+
+# Bench
+
+- `run_bench.py`: pure speed micro-benchmark (synthetic FSM)
+- `run_curiosity_demo.py`: demonstrates **non-advancing PEEK** and **k-ary sequences**
+  with two puzzle families:
+  - **Informative**: `EAT` is valuable *after* `PEEK`, costly otherwise
+  - **Uninformative**: `PEEK` yields cost but no benefit
+
+Expect higher peek rates in the informative segments only.
+
+# Bench
+
+- `run_bench.py`: pure speed micro-benchmark (synthetic FSM)
+- `run_curiosity_demo.py`: demonstrates **non-advancing PEEK** with **k-ary sequences**,
+  logs a CSV of results per segment
+- `plot_curiosity.py`: reads CSV and renders summary figures into an output directory
+
+## Typical usage
+
+```bash
+python -m pip install -r requirements.txt
+. scripts/bench_env.sh
+python bench/run_curiosity_demo.py --out results/curiosity_demo.csv
+python bench/plot_curiosity.py --in results/curiosity_demo.csv --outdir results/figs
+
--- a/bench/plot_curiosity.py
+++ b/bench/plot_curiosity.py
@ -0,0 +1,219 @@
+from __future__ import annotations
+import argparse, os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+# ---------- Style helpers ----------
+
+OKABE_ITO = ["#000000", "#E69F00", "#56B4E9", "#009E73",
+             "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]
+
+def ensure_dir(d: str):
+    os.makedirs(d, exist_ok=True)
+
+def apply_accessible_style(high_contrast: bool, font_scale: float, palette: str, large_fonts: bool):
+    """
+    Apply a readable, colorblind-safe theme.
+    """
+    # Base theme
+    ctx = "talk" if (large_fonts or font_scale >= 1.3) else "notebook"
+    sns.set_theme(style="whitegrid", context=ctx)
+    sns.set(font_scale=max(font_scale, 2.2 if large_fonts else font_scale))
+
+    # Palette
+    if palette == "hc":
+        sns.set_palette(OKABE_ITO)
+    else:
+        try:
+            sns.set_palette("colorblind")
+        except Exception:
+            pass  # fall back to mpl defaults
+
+    # Matplotlib rc for readability
+    rc = plt.rcParams
+    rc["figure.facecolor"] = "white"
+    rc["axes.facecolor"] = "white"
+    rc["savefig.facecolor"] = "white"
+    rc["axes.edgecolor"] = "black"
+    rc["axes.grid"] = True
+    rc["grid.color"] = "#D0D0D0"
+    rc["grid.linewidth"] = 0.9 if (large_fonts or high_contrast) else 0.8
+    rc["legend.frameon"] = True
+    rc["legend.framealpha"] = 0.95
+    rc["legend.facecolor"] = "white"
+    rc["legend.edgecolor"] = "#333333"
+    rc["axes.titleweight"] = "bold" if high_contrast else "normal"
+    rc["axes.labelweight"] = "bold" if (large_fonts or high_contrast) else "regular"
+    rc["lines.linewidth"] = 3.2 if (large_fonts or high_contrast) else 2.0
+    rc["lines.markersize"] = 8.5 if (large_fonts or high_contrast) else 6.0
+    rc["xtick.major.size"] = 6 if (large_fonts or high_contrast) else 5
+    rc["ytick.major.size"] = 6 if (large_fonts or high_contrast) else 5
+
+def load_csv(path: str) -> pd.DataFrame:
+    df = pd.read_csv(path)
+    # coerce numeric cols
+    num_cols = ["segment_index","peek_rate","avg_reward_per_box_step","batch","steps_per_segment","S","A",
+                "gamma","alpha","epsilon","cost_pass","cost_peek","cost_eat","seed"]
+    for c in num_cols:
+        if c in df.columns:
+            df[c] = pd.to_numeric(df[c], errors="coerce")
+    # Keep family as categorical with a stable order
+    if "family" in df.columns:
+        order = ["informative", "uninformative"]
+        cats = [x for x in order if x in df["family"].unique().tolist()]
+        df["family"] = pd.Categorical(df["family"], categories=cats, ordered=True)
+    return df
+
+# Seaborn 0.12/0.13 compatibility: prefer errorbar=('ci',95), fallback to ci=95
+def _barplot_with_ci(df: pd.DataFrame, x: str, y: str, title: str,
+                     annotate: bool, value_fmt: str):
+    try:
+        ax = sns.barplot(data=df, x=x, y=y, estimator=np.mean, errorbar=('ci', 95))
+    except TypeError:
+        ax = sns.barplot(data=df, x=x, y=y, estimator=np.mean, ci=95)
+    plt.title(title)
+    plt.xlabel("")
+    plt.tight_layout()
+
+    if annotate:
+        _annotate_bars(ax, fmt=value_fmt)
+
+def _annotate_bars(ax: plt.Axes, fmt: str = ".3f"):
+    """
+    Annotate each bar with its height (value). Assumes a simple single-hue bar plot.
+    """
+    # Compute an offset proportional to axis span
+    ymin, ymax = ax.get_ylim()
+    offset = 0.01 * (ymax - ymin)
+    for patch in ax.patches:
+        height = patch.get_height()
+        if np.isnan(height):
+            continue
+        x = patch.get_x() + patch.get_width() / 2
+        ax.text(x, height + offset, format(height, fmt),
+                ha="center", va="bottom", fontsize=max(10, plt.rcParams['font.size'] * 0.9),
+                fontweight="bold")
+
+# ---------- Plotters ----------
+
+def plot_peek_rate_by_segment(df: pd.DataFrame, outdir: str, dpi: int, fmt: str, transparent: bool):
+    plt.figure(figsize=(10.5,5.2))
+    sns.lineplot(data=df, x="segment_index", y="peek_rate", hue="family", marker="o")
+    plt.title("Peek rate by segment")
+    plt.xlabel("Segment")
+    plt.ylabel("Peek rate (fraction of actions)")
+    plt.tight_layout()
+    p = os.path.join(outdir, f"peek_rate_by_segment.{fmt}")
+    plt.tight_layout()
+    plt.savefig(p, dpi=dpi, transparent=transparent)
+    plt.close()
+    return p
+
+def plot_reward_by_segment(df: pd.DataFrame, outdir: str, dpi: int, fmt: str, transparent: bool):
+    plt.figure(figsize=(10.5,5.2))
+    sns.lineplot(data=df, x="segment_index", y="avg_reward_per_box_step", hue="family", marker="o")
+    plt.title("Average reward per box-step by segment")
+    plt.xlabel("Segment")
+    plt.ylabel("Avg reward per box-step")
+    plt.tight_layout()
+    p = os.path.join(outdir, f"avg_reward_by_segment.{fmt}")
+    plt.tight_layout()
+    plt.savefig(p, dpi=dpi, transparent=transparent)
+    plt.close()
+    return p
+
+def plot_summary_bars(df: pd.DataFrame, outdir: str, dpi: int, fmt: str, transparent: bool,
+                      annotate: bool, value_fmt: str):
+    plt.figure(figsize=(7.4,5.4))
+    _barplot_with_ci(df, x="family", y="peek_rate",
+                     title="Mean peek rate by family (95% CI)",
+                     annotate=annotate, value_fmt=value_fmt)
+    plt.ylabel("Peek rate")
+    p1 = os.path.join(outdir, f"summary_peek_rate.{fmt}")
+    plt.savefig(p1, dpi=dpi, transparent=transparent)
+    plt.close()
+
+    plt.figure(figsize=(7.4,5.4))
+    _barplot_with_ci(df, x="family", y="avg_reward_per_box_step",
+                     title="Mean avg reward per box-step by family (95% CI)",
+                     annotate=annotate, value_fmt=value_fmt)
+    plt.ylabel("Avg reward per box-step")
+    p2 = os.path.join(outdir, f"summary_avg_reward.{fmt}")
+    plt.tight_layout()
+    plt.savefig(p2, dpi=dpi, transparent=transparent)
+    plt.close()
+    return p1, p2
+
+def plot_reward_vs_peek(df: pd.DataFrame, outdir: str, dpi: int, fmt: str, transparent: bool):
+    plt.figure(figsize=(8.0,6.4))
+    sns.scatterplot(data=df, x="peek_rate", y="avg_reward_per_box_step", hue="family",
+                    s=80, edgecolor="k", linewidth=0.6)
+    # Trend lines per family (no CIs to keep it uncluttered)
+    sns.regplot(data=df[df["family"]=="informative"], x="peek_rate", y="avg_reward_per_box_step",
+                scatter=False, ci=None, truncate=True, line_kws={"linewidth": 3})
+    sns.regplot(data=df[df["family"]=="uninformative"], x="peek_rate", y="avg_reward_per_box_step",
+                scatter=False, ci=None, truncate=True, line_kws={"linewidth": 3})
+    plt.title("Reward vs. Peek rate")
+    plt.xlabel("Peek rate")
+    plt.ylabel("Avg reward per box-step")
+    plt.tight_layout()
+    p = os.path.join(outdir, f"reward_vs_peek_scatter.{fmt}")
+    plt.tight_layout()
+    plt.savefig(p, dpi=dpi, transparent=transparent)
+    plt.close()
+    return p
+
+# ---------- CLI ----------
+
+def main():
+    ap = argparse.ArgumentParser(description="Plot curiosity demo CSV with accessible styling.")
+    ap.add_argument("--in", dest="inp", type=str, required=True, help="Input CSV from run_curiosity_demo.py")
+    ap.add_argument("--outdir", type=str, default="results/figs", help="Directory to save figures")
+    ap.add_argument("--high_contrast", action="store_true", help="Use high-contrast, bold styling")
+    ap.add_argument("--large_fonts", action="store_true", help="Use extra-large fonts and thicker lines")
+    ap.add_argument("--font_scale", type=float, default=1.6, help="Base font scale (ignored if --large_fonts is bigger)")
+    ap.add_argument("--palette", type=str, default="auto", choices=["auto","hc"], help="Color palette: auto=colorblind, hc=Okabe–Ito")
+    ap.add_argument("--dpi", type=int, default=180, help="Figure DPI")
+    ap.add_argument("--format", type=str, default="png", choices=["png","pdf","svg"], help="Output format")
+    ap.add_argument("--transparent", action="store_true", help="Save figures with transparent background")
+    ap.add_argument("--no_annotate", action="store_true", help="Disable numeric labels on bar charts")
+    ap.add_argument("--value_fmt", type=str, default=".3f", help="Number format for bar labels (e.g., .2f, .1% not supported)")
+    args = ap.parse_args()
+
+    ensure_dir(args.outdir)
+    apply_accessible_style(high_contrast=args.high_contrast,
+                           font_scale=args.font_scale,
+                           palette=args.palette,
+                           large_fonts=args.large_fonts)
+
+    df = load_csv(args.inp)
+    print(f"Loaded {len(df)} rows from {args.inp}")
+
+    # Console summary (accessible)
+    grp = df.groupby("family").agg(
+        mean_peek=("peek_rate","mean"),
+        std_peek=("peek_rate","std"),
+        mean_reward=("avg_reward_per_box_step","mean"),
+        std_reward=("avg_reward_per_box_step","std"),
+        n=("peek_rate","count")
+    )
+    print("\nSummary by family:\n", grp)
+
+    annotate = (not args.no_annotate)
+
+    paths = []
+    paths.append(plot_peek_rate_by_segment(df, args.outdir, args.dpi, args.format, args.transparent))
+    paths.append(plot_reward_by_segment(df, args.outdir, args.dpi, args.format, args.transparent))
+    p1, p2 = plot_summary_bars(df, args.outdir, args.dpi, args.format, args.transparent,
+                               annotate=annotate, value_fmt=args.value_fmt)
+    paths.extend([p1, p2])
+    paths.append(plot_reward_vs_peek(df, args.outdir, args.dpi, args.format, args.transparent))
+
+    print("\nSaved figures:")
+    for p in paths:
+        print(" -", p)
+
+if __name__ == "__main__":
+    main()
--- a/bench/run_bench.py
+++ b/bench/run_bench.py
@ -0,0 +1,76 @@
+## `bench/run_bench.py`
+from __future__ import annotations
+import time
+import numpy as np
+from alice_fast.batched_belt import BatchedBelt
+from alice_fast.kernels import PASS, PEEK, EAT
+
+def make_synthetic_fsm(S=128, A=3, seed=7):
+    rng = np.random.default_rng(seed)
+    tt = rng.integers(0, S, size=(S, A), dtype=np.int32)
+    rt = np.full((S, A, S), -0.01, dtype=np.float32)
+    goal_states = rng.choice(S, size=max(1, S // 8), replace=False)
+    for gs in goal_states:
+        rt[:, EAT, gs] = 1.0
+    costs = np.array([-0.02, -0.05, 0.0], dtype=np.float32)
+    return tt, rt, costs
+
+def bench(belt: BatchedBelt, steps: int, warmup: int = 200):
+    for _ in range(warmup):
+        belt.step_learn()
+    t0 = time.perf_counter()
+    for _ in range(steps):
+        belt.step_learn()
+    t1 = time.perf_counter()
+    return t1 - t0
+
+def main():
+    S, A, B = 128, 3, 4096
+    STEPS = 2000
+
+    tt, rt, costs = make_synthetic_fsm(S=S, A=A)
+    belt = BatchedBelt(S, A, tt, rt, costs, batch_size=B, gamma=0.97, alpha=0.2, epsilon=0.05, seed=42)
+
+    t = bench(belt, STEPS)
+    steps_per_sec = (B * STEPS) / t
+    print(f"[Batched+Numba]  {steps_per_sec:,.0f} box-steps/sec (B={B}, steps={STEPS}, elapsed={t:.3f}s)")
+
+    # Naive Python for rough reference (kept intentionally slow)
+    SLOW_STEPS = 200
+    slow_states = np.zeros(B, dtype=np.int32)
+    slow_q = np.zeros((S, A), dtype=np.float32)
+    rng = np.random.default_rng(123)
+
+    def slow_step():
+        nonlocal slow_states, slow_q
+        actions = np.empty(B, dtype=np.int32)
+        for i in range(B):
+            if rng.random() < 0.05:
+                actions[i] = rng.integers(0, A)
+            else:
+                actions[i] = int(np.argmax(slow_q[slow_states[i]]))
+        next_states = np.empty_like(slow_states)
+        rewards = np.empty(B, dtype=np.float32)
+        for i in range(B):
+            s, a = int(slow_states[i]), int(actions[i])
+            ns = rng.integers(0, S)
+            r = (-0.01) + (1.0 if (a == 2 and rng.random() < 0.05) else 0.0)
+            next_states[i] = ns
+            rewards[i] = r
+        for i in range(B):
+            s, a, ns = int(slow_states[i]), int(actions[i]), int(next_states[i])
+            td_target = rewards[i] + 0.97 * np.max(slow_q[ns])
+            slow_q[s, a] += 0.2 * (td_target - slow_q[s, a])
+        slow_states = next_states
+
+    t0 = time.perf_counter()
+    for _ in range(SLOW_STEPS):
+        slow_step()
+    t1 = time.perf_counter()
+    slow_steps_per_sec = (B * SLOW_STEPS) / (t1 - t0)
+    print(f"[Naive Python]   {slow_steps_per_sec:,.0f} box-steps/sec (B={B}, steps={SLOW_STEPS})")
+    print(f"Speedup (approx): {(steps_per_sec / slow_steps_per_sec):.1f}×")
+
+if __name__ == "__main__":
+    main()
+
--- a/bench/run_curiosity_demo.py
+++ b/bench/run_curiosity_demo.py
@ -0,0 +1,139 @@
+from __future__ import annotations
+import argparse, csv, os
+from datetime import datetime
+import numpy as np
+from alice_fast.batched_belt import BatchedBelt
+from alice_fast.kernels import PASS, PEEK, EAT
+from alice_tools.sequence import gellermann_k, audit_sequence
+
+"""
+Curiosity demo with CSV logging.
+
+Two puzzle families:
+  0 = Informative: PEEK (non-advancing) makes EAT good; without PEEK, EAT is bad.
+  1 = Uninformative: PEEK costs but does not change EAT value.
+
+We encode "non-advancing" by augmenting state:
+  S=2 states per puzzle: 0=unpeeked, 1=peeked.
+  PEEK: 0->1, 1->1 (information state only)
+  EAT: returns to 0; reward depends on family+state
+  PASS: resets to unpeeked (small cost).
+"""
+
+def build_tables_informative():
+    S, A = 2, 3
+    tt = np.zeros((S, A), dtype=np.int32)
+    tt[:, PASS] = 0
+    tt[0, PEEK] = 1
+    tt[1, PEEK] = 1
+    tt[:, EAT] = 0
+
+    rt = np.zeros((S, A, S), dtype=np.float32)
+    base_costs = np.array([-0.02, -0.05, 0.0], dtype=np.float32)
+
+    rt[0, EAT, 0] = -0.25   # uninformed 'eat' is risky/bad
+    rt[1, EAT, 0] = 1.0     # informed 'eat' is good
+    return S, A, tt, rt, base_costs
+
+def build_tables_uninformative():
+    S, A = 2, 3
+    tt = np.zeros((S, A), dtype=np.int32)
+    tt[:, PASS] = 0
+    tt[0, PEEK] = 1
+    tt[1, PEEK] = 1
+    tt[:, EAT] = 0
+
+    rt = np.zeros((S, A, S), dtype=np.float32)
+    base_costs = np.array([-0.02, -0.05, 0.0], dtype=np.float32)
+
+    rt[0, EAT, 0] = 0.30    # same payoff whether peeked or not
+    rt[1, EAT, 0] = 0.30
+    return S, A, tt, rt, base_costs
+
+def run_segment(belt: BatchedBelt, steps: int):
+    total_reward = 0.0
+    total_peeks = 0
+    total_actions = 0
+    for _ in range(steps):
+        out = belt.step_learn()
+        total_reward += float(out["rewards"].sum())
+        total_peeks += int(np.sum(out["actions"] == PEEK))
+        total_actions += out["actions"].size
+    return {
+        "avg_reward_per_box_step": total_reward / total_actions,
+        "peek_rate": total_peeks / total_actions
+    }
+
+def ensure_parent(path: str):
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--out", type=str, default=None, help="CSV output path (default: results/curiosity_demo_YYYYmmdd-HHMMSS.csv)")
+    ap.add_argument("--segments", type=int, default=20, help="Number of segments")
+    ap.add_argument("--steps_per_segment", type=int, default=1000, help="Steps per segment")
+    ap.add_argument("--batch", type=int, default=4096, help="Batch size")
+    ap.add_argument("--seed", type=int, default=7, help="Base RNG seed")
+    args = ap.parse_args()
+
+    if args.out is None:
+        stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        args.out = f"results/curiosity_demo_{stamp}.csv"
+    ensure_parent(args.out)
+
+    # Build families
+    S0, A0, tt0, rt0, costs0 = build_tables_informative()
+    S1, A1, tt1, rt1, costs1 = build_tables_uninformative()
+    assert (S0, A0) == (S1, A1)
+    S, A = S0, A0
+
+    # Two belts (same shape, different reward tables)
+    belt_inf  = BatchedBelt(S, A, tt0, rt0, costs0, batch_size=args.batch, gamma=0.97, alpha=0.2, epsilon=0.05, seed=args.seed)
+    belt_uninf= BatchedBelt(S, A, tt1, rt1, costs1, batch_size=args.batch, gamma=0.97, alpha=0.2, epsilon=0.05, seed=args.seed+1)
+
+    # k=2 families, balanced, limited runs
+    seq = gellermann_k(n=args.segments, k=2, run_cap=3, seed=args.seed)
+    audit = audit_sequence(seq, k=2)
+    print("Sequence (0=informative, 1=uninformative):", seq.tolist())
+    print("Audit:", audit)
+
+    # CSV header
+    header = [
+        "segment_index", "family", "peek_rate", "avg_reward_per_box_step",
+        "batch", "steps_per_segment", "S", "A",
+        "gamma", "alpha", "epsilon",
+        "cost_pass", "cost_peek", "cost_eat",
+        "seed"
+    ]
+    with open(args.out, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(header)
+
+        for i, sym in enumerate(seq):
+            if sym == 0:
+                res = run_segment(belt_inf, args.steps_per_segment)
+                fam = "informative"
+                c = costs0
+            else:
+                res = run_segment(belt_uninf, args.steps_per_segment)
+                fam = "uninformative"
+                c = costs1
+
+            row = [
+                i, fam,
+                f"{res['peek_rate']:.6f}", f"{res['avg_reward_per_box_step']:.6f}",
+                args.batch, args.steps_per_segment, S, A,
+                0.97, 0.2, 0.05,
+                float(c[0]), float(c[1]), float(c[2]),
+                args.seed
+            ]
+            w.writerow(row)
+
+            print(f"Seg {i:02d} [{fam[:5].upper()}]  peek_rate={res['peek_rate']:.3f}  "
+                  f"avg_reward/step={res['avg_reward_per_box_step']:.4f}")
+
+    print(f"\nWrote CSV → {args.out}")
+
+if __name__ == "__main__":
+    main()
+