Copied Jory's repo in

2025-09-20 14:53:13 -04:00 · 2025-09-20 14:53:13 -04:00 · 67b9c88cba
parent 1c295d9c40
commit 67b9c88cba
27 changed files with 4549 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,26 @@
-# alice
+# curiosity
+
+### quickstart
+* `. ./jupyter.sh` runs jupyter-lab (setting everything up if necessary)
+
+Note, run scripts using `source <scriptname>` or `. ./<scriptname>`
+
+### helper scripts
+* `. ./update_env.sh` creates or updates the project python environment
+* `. ./activate_env.sh` activate the project environment (calling update if missing)
+* `. ./deactivate_env.sh` deactivate the project environment
+* `. ./jupyter.sh` runs jupyter-lab (calling activate for safety)
+
+### structure
+
+```
+├── code
+│   ├── agents/          # agent algorithms
+│   ├── environments/    # test environments
+│   └── evolve.py        # sample evolution code
+├── notebooks/           # example notebooks
+├── papers/              # useful shared docs
+├── requirements-conda.txt # conda project dependencies
+├── requirements-pip.txt   # pip project dependencies (sometimes necessary)
+```

-ALICE is a project to explore curiosity in a model incorporating both reinforcement learning and evolutionary processes.
--- a/activate_env.sh
+++ b/activate_env.sh
@ -0,0 +1,13 @@
+# conda deactivate in case they have a conda env
+# micromamba deactivate in case they have a micromamba env
+conda deactivate &> /dev/null
+micromamba deactivate &>/dev/null
+
+UMAMBA_PATH="umamba_env"
+if [ ! -d "$UMAMBA_PATH" ]; then
+	echo "no $UMAMBA_PATH found"
+	. ./update_env.sh
+fi
+export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
+eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)"
+micromamba activate curio
--- a/code/init.py
+++ b/code/init.py
--- a/code/agents/q_agent.py
+++ b/code/agents/q_agent.py
@ -0,0 +1,365 @@
+"""
+q_agent.py
+This submodule contains the Agent class, which implements a Q-learning agent with eligibility traces (TD-lambda). The agent learns to make decisions based on its sensory state and rewards received from the environment. The agent uses an epsilon-greedy action-selection strategy.
+
+Usage:
+import q_agent
+
+Class:
+Agent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96)
+
+Attributes:
+obsSpace (tuple): The shape of the observation space.
+actSpace (tuple): The shape of the action space.
+ftrSpace (tuple): The shape of the feature space.
+n_features (int): The total number of features.
+n_actions (int): The total number of actions.
+weights (numpy.ndarray): The Q-function weights.
+trace (numpy.ndarray): The eligibility trace for each feature.
+featureToIndexMap (numpy.ndarray): A mapping from feature indices to the corresponding weights.
+allActions (list): A list of all possible actions.
+alpha (float): The learning rate for updating weights.
+gamma (float): The discount factor for future rewards.
+epsilon (float): The exploration rate for epsilon-greedy action selection.
+lmbda (float): The decay factor for eligibility traces.
+sensoryState (numpy.ndarray): The current sensory state of the agent.
+previousSensoryState (numpy.ndarray): The previous sensory state of the agent.
+action (int): The current action taken by the agent.
+previousAction (int): The previous action taken by the agent.
+episoden (int): The episode number the agent is in.
+recentReset (bool): Indicates if the agent was recently reset.
+
+Methods:
+reset():
+Resets the agent's traces, sensory states, and actions.
+
+predictPayoffsForAllActions() -> List[float]:
+    Predicts the expected payoffs for all possible actions given the current sensory state.
+
+plasticUpdate():
+    Updates the agent's Q-function weights and eligibility traces based on the current sensory state, action, and received reward. Uses epsilon-greedy action selection.
+
+staticUpdate():
+    Updates the agent's action based on the current sensory state without updating weights or traces. Uses greedy action selection.
+
+Examples:
+>>> from q_agent import Agent
+>>> obsSpace, actSpace = (2, 2), (3,)
+>>> agent = Agent(obsSpace=obsSpace, actSpace=actSpace)
+"""
+
+import traceback
+
+import numpy as np
+from collections import defaultdict
+from typing import List, Tuple, Union
+
+from deap import creator, base, tools, algorithms
+
+LOGGING = False
+
+import logging, sys
+logging.basicConfig(stream=sys.stdout,level=logging.INFO)
+log = logging.getLogger()
+
+if not LOGGING:
+  # remove all logging functionality
+  for handler in log.handlers.copy():
+      try:
+          log.removeHandler(handler)
+      except ValueError:  # in case another thread has already removed it
+          pass
+  log.addHandler(logging.NullHandler())
+  log.propagate = False
+
+
+# The Agent class, similar to what
+# is used in MABE. Note: this is unlike
+# how standard RLML folks structure these
+# algorithms. Here, we separate out concerns
+# for modularity. A side-effect is that the
+# update() (one cognitive step) receives the reward
+# for the previous update-action. This means 1 extra
+# update must be called if terminating.
+class Agent():
+
+
+  def __init__(i, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96):
+    i.obsSpace = np.array(obsSpace)
+    i.actSpace = np.array(actSpace)
+    i.ftrSpace = tuple(obsSpace)+tuple(actSpace)
+    i.n_features = np.prod(i.ftrSpace)
+    i.n_actions = actSpace[0] # not general
+    i.weights = np.zeros(i.n_features)
+    i.trace =  np.zeros(i.n_features)
+    i.featureToIndexMap = np.arange(i.n_features).reshape(i.ftrSpace)
+    i.allActions = list(range(i.n_actions))
+    # new
+    i.alpha = alpha     # how much to weigh reward surprises that deviate from expectation
+    i.gamma = gamma     # how important exepcted rewards will be
+    i.epsilon = epsilon # fraction of exploration to exploitation (how often to choose a random action)
+    i.lmbda = lmbda     # how important preceeding actions are in learning adaptation
+    i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
+    i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
+    i.action = 0
+    i.previousAction = 0
+    i.episoden = 0
+    i.recentReset = True
+
+
+  def reset(i): # only resets traces
+    log.info("resetting agent")
+    i.trace = np.zeros(i.n_features)
+    i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
+    i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
+    i.action = 0
+    i.previousAction = 0
+    i.reward = -1
+    i.recentReset = True
+
+
+  def predictPayoffsForAllActions(i) -> List[float]:
+    '''combines current sensoryState and all possible actions to return all possible payoffs by action
+    >>> obsSpace, actSpace, ftrSpace = (2,2), (3,), (2,2)+(3,)
+    >>> i = Agent(obsSpace=obsSpace, actSpace=actSpace)
+    >>> (i.featureToIndexMap == np.arange(i.n_features).reshape((2,2,3))).all()
+    True
+    >>> i.sensoryState[:] = [1,0]
+    >>> i.weights = np.zeros(12)
+    >>> i.weights[6:9] = [1.,2.,3.] # weights associated with features (1,0,<action>) with actions 0,1,2
+    >>> i.predictPayoffsForAllActions()
+    [1.0, 2.0, 3.0]
+    '''
+    #print(i.sensoryState, i.allActions)
+    try:
+        featureKeys = [tuple(i.sensoryState)+(action,) for action in i.allActions]
+        # featuresForEachAction = [i.featureToIndexMap[tuple(i.sensoryState)+(action,)] for action in i.allActions]
+        featuresForEachAction = [i.featureToIndexMap[fki] for fki in featureKeys]
+        #print('featureToIndexMap', i.featureToIndexMap)
+        #print('featureKeys', featureKeys)
+        #print('sensoryState', i.sensoryState, 'allActions', i.allActions)
+        return [i.weights[features].sum() for features in featuresForEachAction]
+    except:
+        estr = f"Error: {traceback.format_exc()}"
+        print(estr)
+        print('featureToIndexMap', i.featureToIndexMap)
+        print('featureKeys', featureKeys)
+        print('sensoryState', i.sensoryState, 'allActions', i.allActions)
+        return [np.nan for x in range(len(i.allActions))]
+        
+
+
+  def plasticUpdate(i):
+    # This algorithm is a TD-lambda algorithm
+    # with epsilon-greedy action-selection
+    # (could use annealing of the epsilon - I removed it again)
+    
+    # determine predicted payoff
+    nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff
+    nextAction = 0
+    # epsilon-greedy action-selection
+    # choose random
+    if np.random.random() < i.epsilon: # random
+      nextAction = np.random.choice(i.n_actions)
+    else: # choose best
+      try:
+          q_vals = i.predictPayoffsForAllActions()
+          nextAction = np.argmax(q_vals)
+          if i.reward >= 0.0: # goal achieved
+            nextActionPredictedPayoff = 0.0
+          else:
+            nextActionPredictedPayoff = q_vals[nextAction]
+      except:
+        estr = f"Error: {traceback.format_exc()}"
+        print(estr)
+        print("q_vals", q_vals)
+    # only update weights if accumulated at least 1 experience
+    if not i.recentReset:
+      # determine the corrected payoff version given the reward actually received
+      previousActionCorrectedPayoff = i.reward + (nextActionPredictedPayoff * i.gamma)
+      # use this information to update weights for last action-selection based on how surprised we were
+      features = i.featureToIndexMap[tuple(i.previousSensoryState)+(i.action,)]
+      previousActionPredictedPayoff = i.weights[features].sum()
+      surprise = previousActionCorrectedPayoff - previousActionPredictedPayoff
+      # do weight updates
+      i.trace[features] = 1.0
+      # do trace updates
+      i.weights += i.alpha * surprise * i.trace
+      i.trace *= i.lmbda
+    # keep track of state and action t, t-1
+    i.previousSensoryState = i.sensoryState[:]
+    i.action = nextAction
+    i.recentReset = False
+
+
+  def staticUpdate(i):
+    # same as plasticUpdate, but without learning
+    # (a.k.a. 'deployment')
+    
+    # determine predicted payoff
+    nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff
+    nextAction = 0
+    # greedy action-selection
+    q_vals = i.predictPayoffsForAllActions()
+    nextAction = np.argmax(q_vals)
+    # step the storage of state and action in memory
+    i.previousSensoryState = i.sensoryState[:]
+    i.action = nextAction
+
+
+"""
+This derived class adds a mutation_rate attribute, as well as methods for mutation, crossover, and fitness handling. You can then use an evolutionary algorithm to evolve a population of EvolvableAgent instances by applying selection, crossover, and mutation operations based on the agents' fitness values.
+"""
+
+def tuple_shape(input_tuple):
+    if not isinstance(input_tuple, tuple):
+        try:
+            return input_tuple.shape
+        except:
+            raise TypeError("Input must be a tuple")
+
+    # Check if the tuple is nested (i.e., if it's a multidimensional tuple)
+    if any(isinstance(item, tuple) for item in input_tuple):
+        shape = []
+        while isinstance(input_tuple, tuple):
+            shape.append(len(input_tuple))
+            input_tuple = input_tuple[0]
+        return tuple(shape)
+    else:
+        return (len(input_tuple),)
+
+class Holder(object):
+    def __init__(self):
+        pass
+    
+class EvolvableAgent(Agent):
+    """ EvolvableAgent
+This class extends the Agent class from q_agent.py, adding functionality for evolutionary computation. The EvolvableAgent class can be used with evolutionary algorithms to optimize the agent's performance through mutation, crossover, and selection based on fitness values.
+
+Usage:
+import EvolvableAgent
+
+Class:
+EvolvableAgent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, mutation_rate=0.05)
+
+Attributes (in addition to Agent attributes):
+mutation_rate (float): The probability of each weight being mutated during mutation.
+fitness (float): The fitness value of the agent, used for evaluation and selection in an evolutionary algorithm.
+
+Methods (in addition to Agent methods):
+mutate():
+Mutates the agent's weights by adding small random values, drawn from a normal distribution. The mutation_rate attribute determines the probability of each weight being mutated.
+
+csharp
+Copy code
+crossover(other: 'EvolvableAgent') -> 'EvolvableAgent':
+    Performs uniform crossover between this agent and another agent, creating a new offspring agent.
+    Args:
+        other (EvolvableAgent): The other agent to perform crossover with.
+    Returns:
+        EvolvableAgent: The offspring agent resulting from the crossover.
+
+set_fitness(fitness: float):
+    Sets the fitness value for the agent.
+    Args:
+        fitness (float): The fitness value to be set.
+
+get_fitness() -> float:
+    Gets the fitness value of the agent.
+    Returns:
+        float: The fitness value of the agent.
+Examples:
+>>> from EvolvableAgent import EvolvableAgent
+>>> obsSpace, actSpace = (2, 2), (3,)
+>>> agent = EvolvableAgent(obsSpace=obsSpace, actSpace=actSpace, mutation_rate=0.05)
+"""
+    def __init__(self, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, \
+                 mutation_rate=0.05, crossover_rate=0.01, fitness=None):
+        # obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96
+        super().__init__(obsSpace, actSpace, alpha, gamma, epsilon, lmbda)
+        self.germline = self.weights
+        self.mutation_rate = mutation_rate
+        self.crossover_rate = crossover_rate
+        self.wfitness = None
+        self.fitness = fitness
+        self.init_fitness = fitness
+
+    def mutate(self):
+        """
+        Mutate the agent's weights by adding small random values, drawn from a normal distribution.
+        The mutation_rate attribute determines the probability of each weight being mutated.
+        """
+        wtshape = self.weights.shape
+        glshape = self.germline.shape
+        mutation_mask = np.random.random(self.germline.shape) < self.mutation_rate
+        self.germline[mutation_mask] += np.random.normal(loc=0, scale=0.01, size=np.sum(mutation_mask))
+        self.weights = self.germline
+        assert glshape == self.germline.shape, "Error: mutate() germline shape has changed"
+        assert wtshape == self.weights.shape, "Error: mutate() weights shape has changed"        
+        
+    def crossover(self, other: 'EvolvableAgent') -> 'EvolvableAgent':
+        """
+        Perform uniform crossover between this agent and another agent, creating a new offspring agent.
+        Args:
+            other (EvolvableAgent): The other agent to perform crossover with.
+        Returns:
+            EvolvableAgent: The offspring agent resulting from the crossover.
+        """
+        wtshape = self.weights.shape
+        glshape = self.germline.shape
+        offspring = EvolvableAgent(self.obsSpace, self.actSpace, self.alpha, self.gamma, self.epsilon, self.lmbda, self.mutation_rate, self.crossover_rate, self.init_fitness4)
+        if np.random.random() <= self.crossover_rate:
+            crossover_mask = np.random.randint(0, 2, size=self.germline.shape, dtype=bool)
+            offspring.germline = np.where(crossover_mask, self.germline, other.germline)
+        else:
+            offspring.germline = self.germline
+        offspring.weights = offspring.germline
+        assert self.obsSpace.shape == offspring.obsSpace.shape, f"Error: offspring has different obsSpace {offspring.obsSpace} != {self.obsSpace}"
+        assert self.actSpace.shape == offspring.actSpace.shape, f"Error: offspring has different actSpace {offspring.actSpace} != {self.actSpace}"
+        assert tuple_shape(self.ftrSpace) == tuple_shape(offspring.ftrSpace), f"Error: offspring had different ftrSpace  {offspring.ftrSpace} {offspring.obsSpace} {offspring.actSpace} != {self.ftrSpace} {self.obsSpace} {self.actSpace}"
+        assert glshape == offspring.germline.shape, "Error: offspring germline shape has changed"
+        assert wtshape == offspring.weights.shape, "Error: offspring weights shape has changed"
+        return offspring
+
+    def set_wfitness(self, fitness: float):
+        """
+        Set the fitnevss value for the agent.
+        Args:
+            fitness (float): The fitness value to be set.
+        """
+        self.wfitness = fitness
+
+    def get_wfitness(self) -> float:
+        """
+        Get the fitness value of the agent.
+        Returns:
+            float: The fitness value of the agent.
+        """
+        return self.wfitness
+
+    def set_fitness(self, fitness: float):
+        """
+        Set the fitness value for the agent.
+        Args:
+            fitness (float): The fitness value to be set.
+        """
+        self.fitness.values = (fitness,)
+
+    def get_fitness(self) -> float:
+        """
+        Get the fitness value of the agent.
+        Returns:
+            float: The fitness value of the agent.
+        """
+        return self.fitness.values[0]
+    
+
+if __name__ == '__main__':
+  '''test important functions and workflows with doctesting
+  run this python file by itself to run these tests, and set
+  LOGGING=True near top of file.'''
+  import doctest
+  from functools import partial
+  #doctest.testmod()
+  test = partial(doctest.run_docstring_examples, globs=globals())
+  test(Agent.predictPayoffsForAllActions)
--- a/code/curio_evolve_weights.py
+++ b/code/curio_evolve_weights.py
@ -0,0 +1,341 @@
+"""
+ew.py
+
+Evolve Weights
+
+Uses DEAP to evolve a set of weights with mutation and crossover.
+
+Integration with other code happens via programming by contract.
+The 'environ' parameter must be an object that provides two
+methods:
+  get_weights_len : returns a scalar integer indicating the 1D vector length for weights
+  evaluate : accepts a weight vector, returns a tuple object containing a single fitness value (e.g., (0.5,))
+and has an attribute related to reinforcement learning for agents:
+  alpha
+
+"""
+
+
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+import copy
+
+import numpy as np, itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+#import environments
+#import agents
+
+# always forces a reload in case you have edited environments or agents
+#importlib.reload(environments)
+#importlib.reload(agents)
+#from environments.gridworld import GridWorld
+#import environments.puzzle as pz
+#from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
+
+#from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+#from mda import MultiDimArray
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+class EvolveWeights(object):
+    """
+    Class to apply DEAP to evolve a population consisting of a set
+    of weights.
+    """
+    def __init__(self,
+                 # environ, # Instance of environ class
+                 # What is needed from environ?
+                 #   weights_len  (int)
+                 #   alpha    (float)
+                 #   evaluate (method/function)
+                 weights_len,
+                 alpha=0.05,
+                 evaluate=None,
+                 popsize=100,
+                 maxgenerations=10000,
+                 cxpb=0.5,
+                 mtpb=0.05,
+                 wmin=-20.0,
+                 wmax=20.0,
+                 mut_center=0.0,
+                 mut_sigma=0.1,
+                 mut_indpb=0.05,
+                 tournsize=5,
+                 tournk=2,
+                 normalize_fitness=True,
+                 tag='environ'
+    ):
+        
+        self.tag = tag
+        self.starttime = isotime()
+        self.logbase = tag + "_" + t2fn(self.starttime)
+
+        # Excluding environment as a parameter
+        # self.environ = environ
+        # Instead, we need to pass in weights_len, alpha, evaluate
+        self.weights_len = weights_len # environ.get_weights_len()
+        self.alpha = alpha
+        self.evaluate = evaluate
+        
+        self.popsize = popsize
+        self.maxgenerations = maxgenerations
+        self.cxpb = cxpb
+        self.mtpb = mtpb
+        self.wmin = wmin
+        self.wmax = wmax
+        self.mut_center = mut_center
+        self.mut_sigma = mut_sigma
+        self.mut_indpb = mut_indpb
+        self.tournsize = tournsize
+        self.tournk = tournk
+        self.normalize_fitness = normalize_fitness
+        pass
+
+    def masv(self, pop):
+        mav = []
+        maxs = []
+        for ind in pop:
+            wts = [x for x in ind]
+            mav.append(np.mean(np.abs(wts)))
+            maxs.append(np.max(np.abs(wts)))
+        allmax = np.max(maxs)
+        mymasv = [x/allmax for x in mav]
+        return mymasv
+
+    def cxTwoPointCopy(self, ind1, ind2):
+        """Execute a two points crossover with copy on the input individuals. The
+        copy is required because the slicing in numpy returns a view of the data,
+        which leads to a self overwriting in the swap operation. It prevents
+        ::
+            >>> import numpy as np
+            >>> a = np.array((1,2,3,4))
+            >>> b = np.array((5,6,7,8))
+            >>> a[1:3], b[1:3] = b[1:3], a[1:3]
+            >>> print(a)
+            [1 6 7 4]
+            >>> print(b)
+            [5 6 7 8]
+        """
+        size = len(ind1)
+        cxpoint1 = random.randint(1, size)
+        cxpoint2 = random.randint(1, size - 1)
+        if cxpoint2 >= cxpoint1:
+            cxpoint2 += 1
+        else: # Swap the two cx points
+            cxpoint1, cxpoint2 = cxpoint2, cxpoint1
+        ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
+        return ind1, ind2
+
+    def zero(self):
+        return 0.0
+
+    def smallrandom(self, eps=None):
+        """
+        Produce a small random number in [-eps .. eps].
+
+        A random variate in [-1 .. 1] is produced then
+        multiplied by eps, so the final range is in [-eps .. eps].
+
+        """
+        if eps in [None]:
+            eps = self.alpha
+        rv = ((2.0 * random.random()) - 1.0) * eps
+        return rv
+    
+    def setup(self):
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
+
+        self.toolbox = base.Toolbox()
+
+        self.pool = multiprocessing.Pool()
+        self.toolbox.register("map", self.pool.map)
+        
+        #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
+        # self.toolbox.register("attr_float", random.random)
+        #self.toolbox.register("attr_float", self.zero)
+        self.toolbox.register("attr_float", self.smallrandom)
+
+        self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+
+        # self.toolbox.register("evaluate", self.evaluate)
+        self.toolbox.register("evaluate", self.evaluate)
+        #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
+        self.toolbox.register("mate", self.cxTwoPointCopy)
+        #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
+        self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
+        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)        
+
+    def normalize_fitnesses(self, fitnesses):
+        #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        maxfitness = np.max([x[0] for x in fitnesses])
+        #print("maxfitness", maxfitness)
+        listfit = [x[0] for x in fitnesses]
+        #print("listfit", listfit)
+        normfit = [x/maxfitness for x in listfit]
+        #print("normfit", normfit)
+        fitnesses = [tuple([x]) for x in normfit]
+        #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        return fitnesses
+
+    def log_it(self, generation):
+        pool = self.pool
+        toolbox = self.toolbox
+        self.pool = None
+        self.toolbox = None
+        pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
+        pickle.dump(self, open(pklfn, "wb"))
+        self.pool = pool
+        self.toolbox = toolbox
+        
+    def loop(self):
+        self.population = self.toolbox.population(n=self.popsize)
+        #print(self.masv(self.population))
+        NGEN=self.maxgenerations
+        for gen in range(NGEN):
+            print("generation",  gen)
+            offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
+            # print("offspring", offspring)
+            # constrain genome values to [0,1]
+            for offspring_i,individual in enumerate(offspring):
+              np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
+            # print("clipped offspring", offspring)
+            # Evaluate the individuals with an invalid fitness (not yet evaluated)
+            # print("check fitness.valid")
+            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+            # print("invalid_ind", len(invalid_ind))
+            #print("setting fitness")
+            fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
+            if self.normalize_fitness:
+                fitnesses = self.normalize_fitnesses(fitnesses)
+                """
+                #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                maxfitness = np.max([x[0] for x in fitnesses])
+                #print("maxfitness", maxfitness)
+                listfit = [x[0] for x in fitnesses]
+                #print("listfit", listfit)
+                normfit = [x/maxfitness for x in listfit]
+                #print("normfit", normfit)
+                fitnesses = [tuple([x]) for x in normfit]
+                #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                """
+            print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+            self.fitness_dist(fitnesses)
+            # print("update ind fitness")
+            for ind, fit in zip(invalid_ind, fitnesses):
+                ind.fitness.values = fit
+            #print("selection")
+            #print("offspring\n", self.masv(offspring))
+            self.offspring = offspring
+            self.population = self.toolbox.select(offspring, k=len(self.population))
+            if 0 == gen % 100:
+                self.log_it(gen)
+            
+            #print("population after selection\n", self.masv(self.population))
+            #print("Report for generation", gen)
+            self.report()
+            
+    def report(self):
+        # post-evolution analysis
+        fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
+        if self.normalize_fitness:
+            fitnesses = self.normalize_fitnesses(fitnesses)
+        self.fitnesses = fitnesses
+        self.sortedFitnesses = sorted(fitnesses)
+        self.sortedFitnesses.reverse()
+        self.fitness_dist(fitnesses)
+        
+        self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
+        print("best/worst w", self.bestFitness, self.worstFitness)
+
+        self.bestGenome = tools.selBest(self.population, k=1)
+        # print(self.bestGenome)
+
+    def ffmt(self, value, fmt="%3.2f"):
+        return fmt % value
+        
+    def fitness_dist(self, fitnesses):
+        listfit = [x[0] for x in fitnesses]
+        pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
+        print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
+        
+    def driver(self):
+        # Initialize
+        self.setup()
+        # Generation loop
+        self.loop()
+        # Report
+        self.report()
+        self.log_it(self.maxgenerations)
+        print(self.masv(self.population))
+        self.pool.close()
+        pass
+
+def normalized(a, axis=-1, order=2):
+    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
+    l2[l2==0] = 1
+    return a / np.expand_dims(l2, axis)
+
+def normalize(v):
+    if 0 == len(v):
+        return np.nan
+    return v/np.linalg.norm(v)
+    
+class MinEnv(object):
+    def __init__(self, wt_len=12, alpha=0.01, w=0.5):
+        self.alpha = alpha
+        self.wt_len = wt_len
+        self.w = w
+    def get_weights_len(self):
+        return self.wt_len
+    def evaluate(self, wts):
+        mywts = np.array([float(x) for x in wts]) 
+        # Max entropy
+        return np.std(normalize(mywts))/0.30,
+
+def test_ew():
+    env1 = MinEnv()
+    
+    ew = EvolveWeights(env1, popsize=100, maxgenerations=10, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+if __name__ == "__main__":
+    print("ew.py start...")
+
+    test_ew()
+    
+    print("ew.py done.")
--- a/code/curio_exp1.py
+++ b/code/curio_exp1.py
@ -0,0 +1,355 @@
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+import copy
+
+import numpy as np # , itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+import environments
+import agents
+
+# always forces a reload in case you have edited environments or agents
+importlib.reload(environments)
+importlib.reload(agents)
+#from environments.gridworld import GridWorld
+import environments.puzzle as pz
+from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
+
+from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+from mda import MultiDimArray
+
+# RESS
+from ress import RESS
+
+# EvolveWeights
+# from ew import EvolveWeights
+from curio_evolve_weights import EvolveWeights
+
+# Experiment
+from curio_experiment import Experiment
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+if (1):
+    unambiguous_puzzle_spec = {
+        "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[2], # state 0: Green
+                             [2], # state 1: Green (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+
+    ambiguous_puzzle_spec = {
+        "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[1], # state 0: Red
+                             [1], # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+
+    specdict = {
+        'unambiguous_puzzle_spec': unambiguous_puzzle_spec,
+        'ambiguous_puzzle_spec': ambiguous_puzzle_spec,
+    }
+
+
+    exp_schedule = {
+        "setlist": [
+            {
+                "desc": "Initial puzzle set",
+                "specs": ['unambiguous_puzzle_spec'],
+                "turns": 50,                          # How many turns for 'lifetime learning'
+                # Needs to be passed to the agent
+                "num_stimuli": 6,                     # How many puzzles?  Or how many different features?
+                # Might just be number of 'features' in puzzle spec
+                # We do not need to manually specify puzzle feature number
+                "sequence_type": "fixed",             # 
+                "probs": [[1.0], [1.0]]               # 
+            },
+            {
+                "desc": "Stochastic puzzle sets",
+                "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
+                "turns": 200,
+                "num_stimuli": 6,
+                "sequence_type": "stochastic",
+                "probs": [[1.0, 0.0], [0.0, 1.0]]
+            },
+        ]
+    }
+
+
+def make_puzzle_list(*args, **kwargs):
+    """
+    """
+    # Sanity checks
+    req_params = ['specdict', 'schedule']
+
+    paramsvalid = True
+
+    for rpi in req_params:
+        if not rpi in kwargs:
+            paramsvalid = False
+            print("make_puzzle_list missing", rpi)
+    assert paramsvalid, f"Error: Missing a required parameter. Quitting."
+
+    specdict = kwargs['specdict']
+    schedule = kwargs['schedule']
+    
+    puzzles = []
+
+    upress = RESS()  # Random Equal Stimulus Sets instance
+
+    for seti in schedule['setlist']:
+        num_sets = len(seti['specs'])
+        num_stimuli = seti['num_stimuli']
+        num_turns = seti['turns']
+        seqtype = seti['sequence_type']
+        probs = seti['probs']
+
+        if 1 == num_sets:
+            # Simple, just repeat the puzzle num_stimuli * times
+            
+            pass
+        else:
+            
+            pass
+        
+        
+    
+    
+def exp1_environment(*args, **kwargs):
+    
+    unambiguous_puzzle_spec = {
+        "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[2], # state 0: Green
+                             [2], # state 1: Green (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+
+    ambiguous_puzzle_spec = {
+        "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[1], # state 0: Red
+                             [1], # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+    # Notion: Have an object to define a schedule of presentation of
+    # environments, with the ability to stochastically present one of
+    # a list of environments.
+    exp_schedule = {
+        "setlist": [
+            {
+                "desc": "Initial puzzle set",
+                "specs": ['unambiguous_puzzle_spec'],
+                "turns": 50,
+                "num_stimuli": 6,
+                "sequence_type": "fixed",
+                "probs": [[1.0], [1.0]]
+            },
+            {
+                "desc": "Stochastic puzzle sets",
+                "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
+                "turns": 200,
+                "num_stimuli": 6,
+                "sequence_type": "stochastic",
+                "probs": [[1.0, 0.0], [0.0, 1.0]]
+            },
+        ]
+    }
+
+    if 'num_puzzles_on_belt' in kwargs:
+        num_puzzles_on_belt = kwargs['num_puzzles_on_belt']
+    else:
+        num_puzzles_on_belt = 6
+
+    pz = unambiguous_puzzle_spec
+    if (1):
+        maxrewards = [1]
+        # Produce Gellermann sequence
+        upress = RESS()
+        print(dir(upress))
+        print(pz['puzzles'])
+        print(len(pz['puzzles']))
+        upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
+        print("upseries", upseries)
+        # Create puzzle sequence
+        # call to make_puzzle_list goes about here
+        
+        # Instantiate puzzles per Gellermann sequence
+        puzzles = []
+        for stimi in upseries:
+
+            stimn = int(stimi)
+            myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
+                         features=pz['puzzles'][stimn]['features'],
+                         rewards=pz['puzzles'][stimn]['rewards']
+                         )
+            puzzles.append(myp)
+        # Create conveyor belt
+        world = ConvBelt(actionSpace = getActionSpace(puzzles),
+                         observationSpace = getObservationSpace(puzzles),
+                         maxRewards = maxrewards,
+                         agentclass=Agent,
+                         randomize = False, alpha=0.005)
+        # Add puzzles
+        for pi in puzzles:
+            world.append(pi)
+            
+    return world
+
+
+
+def do_experiment():
+    # Experiment instance
+    print('creating myexp')
+    myexp = Experiment()
+    print('setting agentclass')
+    myexp.set_agentclass(Agent)
+    print('setting environclass')
+    myexp.set_environclass(ConvBelt)
+    print('setting evolverclass')
+    myexp.set_evolverclass(EvolveWeights)
+    print('setting evolver_attributes')
+    myexp.set_evolver_attributes()             # defaults
+    print('setting environ_maker')
+    myexp.set_environ_maker(exp1_environment)  # sets function
+    print('making environment')
+    myexp.make_environ()                       # Calls function
+    print('making evolver_instance')
+    myexp.make_evolver_instance()
+    if myexp.validate():
+        print('running driver')
+        myexp.evolver.driver()
+    else:
+        print("Experiment failed to validate.")
+    
+    
+    
+if __name__ == "__main__":
+
+    print("exp1.py start...")
+
+    do_experiment()
+
+    print("exp1.py done.")
--- a/code/curio_experiment.py
+++ b/code/curio_experiment.py
@ -0,0 +1,192 @@
+"""
+experiment.py
+
+Curiosity project Experiment class definition.
+
+Aim for better encapsulation.
+
+Experiment class
+ - This class should get the various classes to use in running an experiment
+   - EvolveWeights
+     - mda?
+   - Environ (GridWorld, ConvBelt, Puzzle)
+     - Still is going to require ad hoc function to create the particular Environ
+       - But could pass in function to use
+   - Agentclass
+ - And experimental attributes
+ - For example
+   - Experiment constructs EW instance, passes in weight length
+   - Experiment constructs Environ instance
+   - Experiment requests evolution run of EW with parameters
+   - EW calls Experiment for each evaluation of an individual (and in what generation)
+   - Experiment calls Environ.evaluate with individual weights, agentclass
+     - Passes w, tuple back to EW
+
+"""
+
+import sys
+import os
+import traceback
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+class Experiment(object):
+    """
+    Experiment class. Instances will drive reinforcement learning experiments.
+
+
+    """
+
+    def __init__(self):
+        self.agentclass = None
+        self.environclass = None
+        self.evolverclass = None
+        self.environmaker = None
+        pass
+
+    def validate(self):
+        valid = True
+        # Test that we have classes to use
+        valid = valid and (not self.agentclass in [None])
+        valid = valid and (not self.environclass in [None])
+        valid = valid and (not self.evolverclass in [None])
+        # Test other values here
+        return valid
+
+    def set_schedule(self, schedule):
+        self.schedule = schedule
+
+    def set_environ_maker(self, environmaker):
+        self.environmaker = environmaker
+
+    def make_environ(self):
+        if not self.environmaker in [None]:
+            try:
+                self.environ = self.environmaker()
+            except:
+                estr = f"Error: traceback.format_exc()"
+                print(estr)
+                self.environ = None
+                assert 0, "Creating environment failed. Quitting."
+    
+    def set_agentclass(self, agentclass):
+        # Test class for compatibility
+        okclass = True
+        # No test yet
+
+        if okclass:
+            self.agentclass = agentclass
+
+    def get_agentclass(self):
+        return self.agentclass
+
+    def set_environclass(self, environclass):
+        # Test class for compatibility
+        okclass = True
+
+        if not 'evaluate' in dir(environclass):
+            okclass = False
+            print("set_environclass error: class does not provide 'evaluate'")
+
+        if okclass:
+            self.environclass = environclass
+
+    def get_environclass(self):
+        return self.environclass
+
+    def set_evolverclass(self, evolverclass):
+        # Test class for compatibility
+        okclass = True
+
+        if not 'driver' in dir(evolverclass):
+            okclass = False
+            print("set_evolverclass error: class does not provide 'driver'")
+        
+        if okclass:
+            self.evolverclass = evolverclass
+
+    def set_agent_attributes(self, alpha=0.005):
+        self.agent_props = Holder()
+        self.agent_props.alpha = 0.005
+
+    def set_evolver_attributes(self,
+                               popsize=100,
+                               maxgenerations=10000,
+                               cxpb=0.5,
+                               mtpb=0.05,
+                               wmin=-20.0,
+                               wmax=20.0,
+                               mut_center=0.0,
+                               mut_sigma=0.1,
+                               mut_indpb=0.05,
+                               tournsize=5,
+                               tournk=2,
+                               normalize_fitness=True,
+                               tag='environ'
+                               ):
+        self.evolver_props = Holder()
+        self.evolver_props.popsize = popsize
+        self.evolver_props.maxgenerations = maxgenerations
+        self.evolver_props.cxpb = cxpb
+        self.evolver_props.mtpb = mtpb
+        self.evolver_props.wmin = wmin
+        self.evolver_props.wmax = wmax
+        self.evolver_props.mut_center = mut_center
+        self.evolver_props.mut_sigma = mut_sigma
+        self.evolver_props.mut_indpb = mut_indpb
+        self.evolver_props.tournsize = tournsize
+        self.evolver_props.tournk = tournk
+        self.evolver_props.normalize_fitness = normalize_fitness
+        self.evolver_props.tag = tag
+
+    def make_evolver_instance(self):
+        self.evolver = self.evolverclass(
+            # self.environclass,
+            # weights_len
+            weights_len=self.environ.get_weights_len(),
+            # alpha
+            alpha=self.environ.alpha,
+            # evaluate function
+            evaluate=self.environ.evaluate,
+            popsize=self.evolver_props.popsize,
+            maxgenerations=self.evolver_props.maxgenerations,
+            cxpb=self.evolver_props.cxpb,
+            mtpb=self.evolver_props.mtpb,
+            wmin=self.evolver_props.wmin,
+            wmax=self.evolver_props.wmax,
+            mut_center= self.evolver_props.mut_center,
+            mut_sigma= self.evolver_props.mut_sigma,
+            mut_indpb= self.evolver_props.mut_indpb,
+            tournsize= self.evolver_props.tournsize,
+            tournk= self.evolver_props.tournk,
+            normalize_fitness= self.evolver_props.normalize_fitness,
+            tag= self.evolver_props.tag
+        )
+        
+    def set_env_attributes(self):
+        self.env_props = Holder()
+
+    def handle_evaluation(self, ind, generation):
+        """
+        evolver calls this to get an evaluation of an
+        individual.
+
+        Depending on the experiment schedule and generation,
+        this may require constructing a new environment.
+        """
+        pass
+
+    def run_experiment(self):
+        """
+        # Run experiment
+        ew = EvolveWeights(world,
+                           popsize=100,
+                           maxgenerations=1000,
+                           tournsize=75,
+                           tournk=3,
+                           normalize_fitness=False)
+        ew.driver()
+        """
+        
--- a/code/environments/gridworld.py
+++ b/code/environments/gridworld.py
@ -0,0 +1,93 @@
+# custom version of openAI's gridworld
+# to support arbitrary holes
+
+from typing import Tuple, List, Any
+
+class GridWorld:
+  def __init__(self,dims,startState=[0,0]):
+    self.height = dims[0]
+    self.width = dims[1]
+    self.startState = startState
+    self.state = self.startState[:]
+    self.holes = []
+    self.goals = []
+  def reset(self):
+    '''returns an initial observation while also resetting the environment'''
+    self.state = self.startState[:]
+    return self.state
+  def step(self,action) -> Tuple[Tuple[int], float, bool, Any]:
+    delta = [0,0]
+    if (action == 0): delta[0] = -1
+    elif (action == 2): delta[0] = 1
+    elif (action == 1): delta[1] = 1
+    else: delta[1] = -1
+    newstate = [self.state[0]+delta[0], self.state[1]+delta[1]]
+    newstate[0] = min(max(0,newstate[0]),self.height-1)
+    newstate[1] = min(max(0,newstate[1]),self.width-1)
+    self.state = newstate
+    # set default returns
+    reward = -1.0
+    goalFound = False
+    # check for goal
+    if self.state in self.goals:
+      goalFound = True
+      reward = 0.0
+    elif self.state in self.holes:
+      reward = -10.0
+    # openAIgym format: (state, reward, goalAchieved, DebugVisInfo)
+    return (self.state, reward, goalFound, None)
+
+  def render(env,brain):
+    # renders a gridworld environment
+    # and plots the agent's path
+    import numpy as np
+    import matplotlib.pyplot as plt
+    path = []
+    brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights)
+    nextState = env.reset()
+    dims = [env.height, env.width, 4]
+    path.append(nextState)
+    time = 0
+    while True:
+      time += 1
+      brain.sensoryState = nextState # SET INPUTS
+      brain.plasticUpdate()
+      nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS
+      path.append(nextState)
+      if goal_achieved or time == 100: break
+      brain.reward = reward
+    y,x = zip(*path)
+    x,y = (np.array(x)+0.5, np.array(y)+0.5)
+    # setup figure
+    plt.figure(figsize=(dims[1],dims[0]))
+    # plot landmarks
+    hasGoals = False
+    goals = []
+    hasHoles = False
+    holes = []
+    try: goals = env.goals
+    except AttributeError: pass
+    else: hasGoals = True
+    try: holes = env.holes
+    except AttributeError: pass
+    else: hasHoles = True
+    if hasGoals:
+      for goal in goals:
+        newrec = plt.Rectangle((goal[1], goal[0]), 1, 1, color='green', edgecolor=None, linewidth=2.5, alpha=0.7)
+        plt.gca().add_patch(newrec)
+    if hasHoles:
+      for hole in holes:
+        newrec = plt.Rectangle((hole[1], hole[0]), 1, 1, color='orange', edgecolor=None, linewidth=2.5, alpha=0.7)
+        plt.gca().add_patch(newrec)
+    plt.plot(x,y,color='gray')
+    plt.scatter(x[0],y[0],s=64,color='green')
+    plt.scatter(x[-1],y[-1],s=64,color='red')
+    plt.grid(linestyle='--')
+    plt.ylim([0,dims[0]])
+    plt.xlim([0,dims[1]])
+    plt.gca().set_yticks(list(range(dims[0])))
+    plt.gca().set_xticks(list(range(dims[1])))
+    plt.gca().invert_yaxis()
+    # print out location history
+    print(' '.join([str(x)+','+str(y) for x,y in path]))
+
--- a/code/environments/puzzle.py
+++ b/code/environments/puzzle.py
@ -0,0 +1,494 @@
+"""
+puzzle.py
+
+
+"""
+
+import numpy as np, itertools
+from random import shuffle
+from typing import List, Tuple, Union, Any
+import copy
+#import gym, gym_gridworlds # if using other environments
+
+
+# overridden in agent.py, typically due to load order
+LOGGING = True
+
+import logging, sys
+logging.basicConfig(stream=sys.stdout,level=logging.INFO)
+log = logging.getLogger()
+
+if not LOGGING:
+  # remove all logging functionality
+  for handler in log.handlers.copy():
+      try:
+          log.removeHandler(handler)
+      except ValueError:  # in case another thread has already removed it
+          pass
+  log.addHandler(logging.NullHandler())
+  log.propagate = False
+
+class Puzzle:
+
+  __slots__ = [
+      'tt',
+      'features',
+      'rewards',
+      'state',
+      'initialState',
+      'solved',
+      'solvable',
+      'maxrewards',
+      'originalrewards']
+
+
+  def __init__(self, tt:List[List[int]], features:List[int], rewards:List[float], initialState:int = 0):
+    self.tt = tt
+    self.features = features
+    self.rewards = rewards[:]
+    self.originalrewards = rewards
+    self.state = 0
+    self.initialState = initialState
+    self.solved = False
+
+
+  def __str__(self) -> str:
+    output = ""
+    output += "transition table:\n"
+    for row in self.tt:
+      output += f"  {str(row)}\n"
+    output += f"solved: {self.solved}\n"
+    output += f"state: {self.state}\n"
+    output += f"features: {self.features}\n"
+    output += f"rewards: {self.rewards}\n"
+    return output
+
+
+  def reset(self):
+    '''must be called before first use'''
+    self.solved = False
+    self.state = self.initialState
+    self.rewards = self.originalrewards[:]
+
+
+  def setMaxRewards(self, maxRewards):
+    '''typically used by the ConvBelt class before reset()'''
+    self.maxrewards = set(self.rewards) & set(maxRewards)
+    self.solvable = bool(self.maxrewards)
+
+
+  def transition(self,action:int) -> Tuple[float, List[int], bool]:
+    self.state = self.tt[self.state][action]
+    finished = False
+    reward = self.rewards[self.state]
+    if self.rewards[self.state] in self.maxrewards:
+      self.rewards[self.state] = -1 # 'eat' the food and replace with empty reward
+      finished = True
+      self.solved = True
+    return (reward, self.features[self.state], finished)
+
+  def getFeatures(self) -> List[int]:
+    '''returns only the current observable features of the puzzle'''
+    return self.features[self.state]
+
+
+def Action(index:Union[int,str]) -> Union[str,int]:
+  ''' action str <-> int Action('pass')->1 Action(1)->'pass' '''
+  if isinstance(index, (int,np.int64)):
+    return ('idle','pass','investigate','eat')[index]
+  return {'idle':0,'pass':1,'investigate':2,'eat':3}[index]
+
+
+class ConvBelt:
+    """
+  __slots__ = [
+    'puzzles',               # (list[Puzzle]) - list of puzzles, use append()
+    'pi',                    # (int) - currently selected puzzle / "puzzle index"
+    'puzzle',                # (ref:Puzzle) - shortcut for self.puzzles[pi]
+    'randomize',             # (bool) - shuffling of puzzles between trials
+    'maxrewards',            # (list[float]) - the maximum achievable rewards
+    'action_space',          # (tuple[int]) - number of actions available to agents, usually (4,)
+    'observation_space',     # (tuple[int]) - features/dimensions given to agents (dim1 size, dim2 size...)
+    'puzzlesLeftToComplete', # (int) - faster tracking of how many are left, when 0 set self.solved
+    'solved',                # (bool) - state flag for all puzzles solved (trial can be over)
+    'agentclass',
+    'killed_reward',
+    'max_training_trials',
+    'max_steps',
+    'alpha',
+    'gamma',
+    'epsilon',
+    'lmbda',
+    #'get_weights_len',
+    #'reset',
+    #'extend',
+    #'clear',
+    ]
+    """
+
+    def __init__(self,actionSpace,observationSpace,maxRewards, agentclass,
+                 killed_reward=-10.0, max_training_trials=50, max_steps=32,
+                 alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42, randomize=False):
+        '''please provide entire actionSpace, observationSpace, maxRewards for all puzzles
+        even those later added this environment'''
+        self.puzzles = []
+        self.pi = 0
+        self.puzzle = None
+        self.randomize = randomize
+        self.action_space = actionSpace
+        self.observation_space = observationSpace
+        self.maxrewards = maxRewards
+        self.puzzlesLeftToComplete = 0
+        self.solved = False
+
+        self.agentclass = agentclass
+        self.killed_reward = killed_reward
+        self.max_training_trials = max_training_trials
+        self.max_steps = max_steps
+        self.alpha = alpha
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.lmbda = lmbda
+
+        print(self.get_weights_len())
+
+    def get_weights_len(self):
+        """
+        Return the length of weights needed for an agent.
+        """
+        print("in ConvBelt.get_weights_len")
+        mywl = np.prod(tuple(self.observation_space) + tuple(self.action_space))
+        return mywl
+
+    def reset(self):
+        '''returns an initial observation while also resetting the environment'''
+        log.info("resetting all puzzles")
+        self.puzzlesLeftToComplete = 0
+        for puzzle in self.puzzles:
+          puzzle.reset()
+          if puzzle.solvable:
+            self.puzzlesLeftToComplete += 1
+        self.solved = not bool(self.puzzlesLeftToComplete)
+        if self.randomize: shuffle(self.puzzles)
+        self.pi = 0
+        if len(self.puzzles) == 0:
+          raise Exception("Please add puzzles to the belt/env first using append() or extend()")
+        self.puzzle = self.puzzles[self.pi]
+        return self.puzzle.getFeatures()
+
+    def append(self, newPuzzle:Puzzle):
+        log.info("adding new puzzle")
+        newPuzzle.setMaxRewards(self.maxrewards)
+        newPuzzle.reset()
+        if newPuzzle.solvable:
+          self.puzzlesLeftToComplete += 1
+          self.solved = False
+        self.puzzles.append(newPuzzle)
+        if self.puzzle is None:
+          self.reset()
+
+    def extend(self, newPuzzles:List[Puzzle]):
+        log.info(f"adding {len(newPuzzles)} new puzzles")
+        oldLength = len(self.puzzles)
+        self.puzzles.extend(newPuzzles)
+        newLength = len(self.puzzles)
+        for puzzle_i in range(oldLength, newLength):
+          puzzle = self.puzzles[puzzle_i]
+          puzzle.setMaxRewards(self.maxRewards)
+          puzzle.reset()
+          if puzzle.solvable:
+            self.puzzlesLeftToComplete += 1
+            self.solved = False
+        if self.puzzle is None:
+          self.reset()
+
+    def _post_removal(self):
+        if len(self.puzzles) == 0:
+          self.puzzle = None
+          log.info("puzzles list now empty")
+        if self.pi >= len(self.puzzles)-1:
+          self.pi = 0
+          log.info("resetting index to 0")
+
+    def clear(self):
+        '''clears the belt of puzzles'''
+        self.puzzles.clear()
+        log.info("removed ALL puzzles")
+        self.puzzlesLeftToComplete = 0
+        self._post_removal()
+
+    def remove(self, puzzle):
+        '''removes puzzle from belt of puzzles'''
+        if puzzle.solvable:
+          self.puzzlesLeftToComplete -= 1
+        self.puzzles.remove(puzzle)
+        log.info("removed puzzle")
+        self._post_removal()
+
+    def pop(self, index=None):
+        '''removes puzzle at index or from end'''
+        if index is None:
+          index = -1
+        puzzle = self.puzzles.pop(index)
+        if puzzle.solvable:
+          self.puzzlesLeftToComplete -= 1
+        log.info(f"popped puzzle at index {index}")
+        self._post_removal()
+
+    def _completed_a_puzzle(self):
+        self.puzzlesLeftToComplete -= 1
+        log.info(f"completed a puzzle - {self.puzzlesLeftToComplete} solvable puzzles remain")
+        if self.puzzlesLeftToComplete == 0:
+          self.solved = True
+          log.info(f"all puzzles completed - trial complete")
+
+    def step(self, action:int) -> Tuple[List[int], float, bool, Any]: # returns (state,reward,goal,_) (gym format)
+        if action == 1: # pass (change to next puzzle, and change no puzzle's state)
+          self.pi = (self.pi + 1) % len(self.puzzles)
+          # reports states of old and new puzzles instead of a transition
+          log.info(f"(puzzle-step) action {action} ({Action(action)}) from old puzzle state {self.puzzle.state} to new puzzle state {self.puzzles[self.pi].state}")
+          self.puzzle = self.puzzles[self.pi]
+          return (self.puzzle.features[self.puzzle.state], # features
+                  -1,                                # reward of a pass
+                  #self.puzzle.rewards[self.puzzle.state],  # reward
+                  self.solved,                          # done-flag
+                  None)                              # DebugVisInfo
+        else:
+          log.info(f"(puzzle-step) action {action} ({Action(action)}) from state {self.puzzle.state} to {self.puzzle.tt[self.puzzle.state][action]}")
+          reward, features, puzzle_just_finished = self.puzzle.transition(action)
+          if puzzle_just_finished:
+            self._completed_a_puzzle()
+          return (features, reward, self.solved, None)
+
+    def render(self, env, brain):
+        # renders a puzzlebox environment
+        import numpy as np
+        import matplotlib.pyplot as plt
+        actions = []
+        rewards = []
+        states = []
+        brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights)
+        nextState = env.reset()
+        states.append(nextState)
+        actions.append(0) # path is recording actions in this visualization
+        rewards.append(-1)
+        time = 0
+        print(env.puzzlesLeftToComplete)
+        while True:
+          time += 1
+          brain.sensoryState = nextState # SET INPUTS
+          brain.plasticUpdate()
+          nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS
+          actions.append(brain.action)
+          rewards.append(reward)
+          states.append(nextState)
+          if env.puzzlesLeftToComplete == 0 or time == 600: break
+          #if goal_achieved or time == 100: break
+          brain.reward = reward
+        print(actions)
+        print(states)
+        plt.figure()
+        plt.plot(actions)
+        plt.scatter(list(range(len(actions))),actions)
+        plt.figure()
+        plt.plot(rewards)
+        plt.scatter(list(range(len(rewards))),rewards)
+    
+    def evaluate(self, ind,
+               num_trials=200,
+               n_actions=4,
+               HARD_TIME_LIMIT=600):
+        """
+        Given an individual agent's weights, evaluate it and
+        return its fitness.
+        """
+        w = 0.0
+
+        # Need to refactor the following code taken from the
+        # Jupyter notebook.
+
+        # domain-specific settings
+        #num_trials=200
+        #n_actions = 4
+        #(optimal lmbda in the agent is domain dependent - could be evolved)
+        #HARD_TIME_LIMIT = 600
+        #KILLED_REWARD = -10 # not used here
+        #(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)
+        #(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)
+
+        # alpha     # how much to weigh reward surprises that deviate from expectation
+        # gamma     # how important exepcted rewards will be
+        # epsilon   # fraction of exploration to exploitation (how often to choose a random action)
+        # lmbda     # how slowly memory of preceeding actions fades away (1=never, 0=
+
+        agent = self.agentclass(obsSpace=self.observation_space, actSpace=self.action_space, alpha=self.alpha,
+                                gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
+
+
+        # Put weights in the Agent
+        agent.weights = [x for x in ind]
+
+        time_to_solve_each_trial = []
+        rewards = []
+
+        for trialN in range(self.max_training_trials):
+            # some output to see it running
+            if (trialN % 10) == 0: print('.',end='')
+            # initialize the agent, environment, and time for this trial
+            agent.reset() # soft-reset() (keeps learned weights)
+            nextState = self.reset()
+            time = 0
+            while True:
+                time += 1
+                # set agent senses based on environment and allow agent to determine an action
+                agent.sensoryState = nextState
+                agent.plasticUpdate()
+                # determine effect on environment state & any reward (in standard openAI-gym API format)
+                nextState, reward, goal_achieved, _ = self.step(agent.action)
+                agent.reward = reward
+                if self.puzzlesLeftToComplete == 0 or time == self.max_steps:
+                    agent.plasticUpdate()
+                    break
+                # could have deadly rewards that stop the trial early
+                #elif reward <= -10:
+                #    agent.sensoryState = nextState
+                #    agent.reward = reward
+                #    agent.plasticUpdate()
+                #    agent.reset()
+                #    nextState = self.reset()
+                rewards.append(reward)
+            time_to_solve_each_trial.append(time)
+
+            # Calculate fitness
+            # Rewards are in [-1 .. 1], have to rescale to [0 .. 1]
+            #scalerewards = (np.array(rewards) * 0.5) + 0.5
+            #w = np.mean(scalerewards)
+            w = sum(rewards)
+
+        return w,
+
+
+def getObservationSpace(*items) -> Tuple[int]:
+  '''Returns total features dimensions over all puzzles, starting from 0.
+  Given 1 or more puzzles, finds union of observation space (features).
+  then returns the size of that space.
+  Ensures all puzzles have same feature dimensions, errors if not.
+  Useful when setting up a RL state space for certain feature sizes.
+  [3,1] would have dimensions [4,2], and [[0,2],[0,1]] would be [1,3]
+
+  >>> p1 = Puzzle(tt=[[]], rewards=[], features=[[0,1],[0,1],[3,1]])
+  >>> getObservationSpace(p1)
+  (4, 2)
+  >>> p2 = Puzzle(tt=[[]], rewards=[], features=[[1,1],[1,1],[2,4]])
+  >>> getObservationSpace(p2)
+  (3, 5)
+  >>> getObservationSpace(p1,p2)
+  (4, 5)
+  >>> puzzles = [p1,p2]
+  >>> getObservationSpace(puzzles)
+  (4, 5)
+  '''
+  if type(items) is tuple and isinstance(items[0], Puzzle):
+    # perform union (max) over feature space of all items
+    highest = copy.copy(items[0].features[0]) # features is [[int,int,...],...]
+    featurelen = len(highest)
+    for puzzle in items:
+      for featureset in puzzle.features:
+        if len(featureset) != featurelen:
+          raise Exception("not all features have the same length")
+        for feature_i in range(len(featureset)):
+          highest[feature_i] = max(highest[feature_i],featureset[feature_i])
+    return tuple((e+1 for e in highest)) # size is 1+highest due to 0-indexing of features
+  elif type(items) is tuple and type(items[0]) in (tuple,list):
+    return getObservationSpace(*items[0]) # unpack one layer
+  else:
+    raise Exception(f"Expected type of Puzzle(s), but got {type(items)}")
+
+
+def getActionSpace(*items) -> Tuple[int]:
+  '''Returns total action dimensions over all puzzles, (num columns in tt).
+  Given 1 or more puzzles.
+  Ensures all puzzles have same dimensions, errors if not.
+  Useful when setting up a RL state space for certain action sizes.
+
+  >>> p1 = Puzzle(tt=[[0,0],[4,2]], rewards=[], features=[[]])
+  >>> getActionSpace(p1)
+  (2,)
+  >>> p2 = Puzzle(tt=[[0,0,1],[1,1,2]], rewards=[], features=[[]])
+  >>> getActionSpace(p2)
+  (3,)
+  >>> getActionSpace(p1,p2)
+  Traceback (most recent call last):
+   ...
+  Exception: not all puzzles (rows) have the same tt col size
+  '''
+
+  if type(items) is tuple and isinstance(items[0], Puzzle):
+    # perform union (max) over feature space of all items
+    nrows, ncols = len(items[0].tt), len(items[0].tt[0])
+    for puzzle in items:
+      prows = len(puzzle.tt)
+      if prows != nrows:
+        raise Exception("not all puzzles have the same tt row size")
+      samerows = [len(c) == ncols for c in puzzle.tt]
+      if not all(samerows):
+        raise Exception("not all puzzles (rows) have the same tt col size")
+    return (ncols,)
+  elif type(items) is tuple and type(items[0]) in (tuple,list):
+    return getActionSpace(*items[0]) # unpack one layer
+  else:
+    raise Exception(f"Expected type of Puzzle(s), but got {type(items)}")
+
+
+def _test_world():
+  '''full test of the conveyorbelt world
+
+  >>> import copy
+  >>> maxrewards = [1]
+  >>> easy_features = [[0,1],[0,1],[3,1],[0,0]]
+  >>> easy_rewards = [-1,-1,-1,1]
+  >>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
+  >>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
+  >>> p2 = copy.deepcopy(p1)
+  >>> puzzles = (p1,p2)
+  >>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False)
+  >>> world.append(p1)
+  >>> world.append(p2)
+  >>> # trial 1
+  >>> world.reset() # reset before first use just to be sure
+  >>> world.step(Action('investigate'))
+  (-1, [3, 1], False)
+  >>> world.step(Action('pass'))
+  (-1, [0, 1], False)
+  >>> world.step(Action('eat'))
+  (1, [0, 0], False)
+  >>> world.step(Action('pass'))
+  (-1, [3, 1], False)
+  >>> world.step(Action('eat'))
+  (1, [0, 0], True)
+  >>> world.step(Action('eat')) # try eating again, notice reward change
+  (-1, [0, 0], True)
+  >>> # trial 2
+  >>> world.reset()
+  >>> world.step(Action('investigate'))
+  (-1, [3, 1], False)
+  >>> world.step(Action('pass'))
+  (-1, [0, 1], False)
+  >>> world.step(Action('eat'))
+  (1, [0, 0], False)
+  >>> world.step(Action('pass'))
+  (-1, [3, 1], False)
+  >>> world.step(Action('eat'))
+  (1, [0, 0], True)
+  '''
+
+if __name__ == '__main__':
+  '''test important functions and workflows with doctesting
+  run this python file by itself to run these tests, and set
+  LOGGING=True near top of file.'''
+  import doctest
+  from functools import partial
+  test = partial(doctest.run_docstring_examples, globs = globals())
+  test(getObservationSpace)
+  test(getActionSpace)
+  test(_test_world)
--- a/code/evolve.py
+++ b/code/evolve.py
@ -0,0 +1,76 @@
+import random
+from deap import creator, base, tools, algorithms
+import numpy as np
+
+creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
+
+toolbox = base.Toolbox()
+
+#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
+toolbox.register("attr_float", random.random)
+toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=100)
+toolbox.register("population", tools.initRepeat, list, toolbox.individual)
+
+def linearFitness(individual):
+    '''selection pressure for genome values to be numpy.arange(start=0.0, stop=1.0, step=1/len(genome))'''
+    import numpy as np
+    a = np.arange(0, 1, 1.0/len(individual))
+    b = np.array(individual)
+    return 1.0-np.sum(np.abs(a-b))/(len(individual)*0.5),
+
+def cxTwoPointCopy(ind1, ind2):
+    """Execute a two points crossover with copy on the input individuals. The
+    copy is required because the slicing in numpy returns a view of the data,
+    which leads to a self overwriting in the swap operation. It prevents
+    ::
+        >>> import numpy as np
+        >>> a = np.array((1,2,3,4))
+        >>> b = np.array((5,6,7,8))
+        >>> a[1:3], b[1:3] = b[1:3], a[1:3]
+        >>> print(a)
+        [1 6 7 4]
+        >>> print(b)
+        [5 6 7 8]
+    """
+    size = len(ind1)
+    cxpoint1 = random.randint(1, size)
+    cxpoint2 = random.randint(1, size - 1)
+    if cxpoint2 >= cxpoint1:
+        cxpoint2 += 1
+    else: # Swap the two cx points
+        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
+    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
+    return ind1, ind2
+
+toolbox.register("evaluate", linearFitness)
+#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
+toolbox.register("mate", cxTwoPointCopy)
+#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
+toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=0.05)
+toolbox.register("select", tools.selTournament, tournsize=3)
+
+# evolution loop
+population = toolbox.population(n=100)
+NGEN=500
+for gen in range(NGEN):
+    offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)
+    # constrain genome values to [0,1]
+    for offspring_i,individual in enumerate(offspring):
+      np.clip(np.array(offspring[offspring_i]), 0.0, 1.0)
+    # Evaluate the individuals with an invalid fitness (not yet evaluated)
+    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
+    for ind, fit in zip(invalid_ind, fitnesses):
+        ind.fitness.values = fit
+    population = toolbox.select(offspring, k=len(population))
+
+# post-evolution analysis
+fitnesses = toolbox.map(toolbox.evaluate, population)
+sortedFitnesses = sorted(fitnesses)
+
+bestFitness, worstFitness = sortedFitnesses[0], sortedFitnesses[-1]
+print(bestFitness, worstFitness)
+
+bestGenome = tools.selBest(population, k=1)
+print(bestGenome)
--- a/code/exp1.py
+++ b/code/exp1.py
@ -0,0 +1,333 @@
+"""
+exp1.py - instance of use of 'experiment.py'
+
+Tasks:
+
+  - Consider how to have a changing schedule of stimulus presentation
+
+Need to have something where we can see evolution producing a trait that
+would indicate interest in new things in the environment. Sets up conditions
+where curiosity could be advantageous.
+
+Conveyor belt needs to have the ability to introduce new things.
+
+Single factor shift to start -- color of the thing ?
+The introduction of novelty is the main thing, where the novelty is
+associated with fitness advantage.
+
+Simple systems to test
+  - constant environment
+  - switch between two different environments
+    - frequency of shift makes a difference
+    - Goldilocks zone for intermediate frequency
+Controlled randomization
+ - Known low-payoff 'food' in environment
+ - Better thing has a cue
+   - Changing frequency of presentation
+     - Constant
+     - Ramp
+     - Cycle
+   - 'Green' could indicate better but
+     - x factor for better could be changed
+
+For all of these, we can test unseen (novel) stimuli
+ - Generalization can be tested
+   - Cue of goodness
+ - Proportion of time novel stimulus are rewarding
+   - Must be a proportion to introduce unpredictability
+
+One hypothesis: unpredictability between cues and rewards may lead to curiosity
+ - Evolutionary timescale of unpredictability
+ - Predictable lifetime
+
+Push current code to repository.
+
+
+"""
+
+
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+import copy
+
+import numpy as np # , itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+import environments
+import agents
+
+# always forces a reload in case you have edited environments or agents
+importlib.reload(environments)
+importlib.reload(agents)
+#from environments.gridworld import GridWorld
+import environments.puzzle as pz
+from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
+
+from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+from mda import MultiDimArray
+
+# RESS
+from ress import RESS
+
+# EvolveWeights
+# from ew import EvolveWeights
+from curio_evolve_weights import EvolveWeights
+
+# Experiment
+from experiment import Experiment
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    """
+    A general class for the equivalent of a digital duffle bag, each instance 
+    can have essentially whatever you want stuffed into it.
+
+    This is essentially the very opposite of defining classes with the
+    __slots__ convention, leaving the contents entirely open.
+
+    I've found this useful for making context objects. If I am careful,
+    the whole object can be serialized to disk and loaded later.
+    """
+    def __init__(self):
+        pass
+    
+"""
+Probability of reward at all
+Probability of strength of reward
+
+Variances:
+ - How many puzzle cues do we have?
+ - How often does a puzzle appear in training?
+ - How often does a puzzle appear across evolutionary time?
+ - How much reward does solving a puzzle deliver?
+
+Two things , green | red
+ green good
+ red bad
+
+Outcomes
+ - Too unlikely -> no behavior to examine
+ - Entirely predictable
+ - In between -> curiosity has advantage
+
+ First sample from uniform distribution to determine reward (0.5)
+ Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.)
+
+Spot or range where it becomes advantageous to evolve a curiosity module...
+
+
+Figuring out a representation that allows all the flexibility we discussed...
+
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[2], # state 0: Green
+                             [2], # state 1: Green (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [
+                            [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1,   # state 2: consume (reward)
+                            0.5  # Proportion
+                            ],
+                            [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            -1,   # state 2: consume (punishment)
+                            0.5  # Proportion
+                            ],
+                          ]
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+
+
+"""
+    
+def exp1_environment(*args, **kwargs):
+    
+    unambiguous_puzzle_spec = {
+        "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[2], # state 0: Green
+                             [2], # state 1: Green (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+
+    ambiguous_puzzle_spec = {
+        "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
+        "puzzles": [
+            {
+                "puzzle_description": "Appetitive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)
+                "features": [[1], # state 0: Red
+                             [1], # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1, # state 0: first look
+                            -1, # state 1: proceeding to next puzzle (placeholder)
+                            1],  # state 2: consume (reward)
+            },
+            {
+                "puzzle_description": "Aversive puzzle",
+                "tt": [[0,0,2],  # state 0: first presentation
+                       [0,0,0],  # state 1: getting passed over (placeholder)
+                       [2,2,2]], # state 2: consumed (saturating)],
+                "features": [[1],  # state 0: Red
+                             [1],  # state 1: Red (placeholder)
+                             [0]], # state 2: Empty/Unknown (after being eaten)
+                "rewards": [-1,  # state 0: first look
+                            -1,  # state 1: proceeding to next puzzle (placeholder)
+                            -2], # state 2: consume (punishment)
+            },
+        ]
+    }
+    # Notion: Have an object to define a schedule of presentation of
+    # environments, with the ability to stochastically present one of
+    # a list of environments.
+    exp_schedule = {
+        "setlist": [
+            {
+                "desc": "Initial puzzle set",
+                "specs": [unambiguous_puzzle_spec],
+                "turns": 50,
+                "num_stimuli": 6,
+                "sequence_type": "fixed",
+                "probs": [[1.0], [1.0]]
+            },
+            {
+                "desc": "Stochastic puzzle sets",
+                "specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec],
+                "turns": 200,
+                "num_stimuli": 6,
+                "sequence_type": "stochastic",
+                "probs": [[1.0, 0.0], [0.0, 1.0]]
+            },
+        ]
+    }
+
+    if 'num_puzzles_on_belt' in kwargs:
+        num_puzzles_on_belt = 6
+
+    pz = unambiguous_puzzle_spec
+    if (1):
+        maxrewards = [1]
+        # Produce Gellermann sequence
+        upress = RESS()
+        print(dir(upress))
+        print(pz['puzzles'])
+        print(len(pz['puzzles']))
+        upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
+        print("upseries", upseries)
+        # Create puzzle sequence
+        # Instantiate puzzles per Gellermann sequence
+        puzzles = []
+        for stimi in upseries:
+
+            stimn = int(stimi)
+            myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
+                         features=pz['puzzles'][stimn]['features'],
+                         rewards=pz['puzzles'][stimn]['rewards']
+                         )
+            puzzles.append(myp)
+        # Create conveyor belt
+        world = ConvBelt(actionSpace = getActionSpace(puzzles),
+                         observationSpace = getObservationSpace(puzzles),
+                         maxRewards = maxrewards,
+                         agentclass=Agent,
+                         randomize = False, alpha=0.005)
+        # Add puzzles
+        for pi in puzzles:
+            world.append(pi)
+            
+    return world
+    
+def do_experiment():
+    # Experiment instance
+    myexp = Experiment()
+    myexp.set_agentclass(Agent)
+    myexp.set_environclass(ConvBelt)
+    myexp.set_evolverclass(EvolveWeights)
+    myexp.set_evolver_attributes()             # defaults
+    myexp.set_environ_maker(exp1_environment)  # sets function
+    myexp.make_environ()                       # Calls function
+    myexp.make_evolver_instance()
+    if myexp.validate():
+        myexp.evolver.driver()
+    else:
+        print("Experiment failed to validate.")
+    
+    
+    
+if __name__ == "__main__":
+
+    print("exp1.py start...")
+
+
+    do_experiment()
+
+    print("exp1.py done.")
--- a/code/experiment.py
+++ b/code/experiment.py
@ -0,0 +1,185 @@
+"""
+experiment.py
+
+Curiosity project Experiment class definition.
+
+Aim for better encapsulation.
+
+Experiment class
+ - This class should get the various classes to use in running an experiment
+   - EvolveWeights
+     - mda?
+   - Environ (GridWorld, ConvBelt, Puzzle)
+     - Still is going to require ad hoc function to create the particular Environ
+       - But could pass in function to use
+   - Agentclass
+ - And experimental attributes
+ - For example
+   - Experiment constructs EW instance, passes in weight length
+   - Experiment constructs Environ instance
+   - Experiment requests evolution run of EW with parameters
+   - EW calls Experiment for each evaluation of an individual (and in what generation)
+   - Experiment calls Environ.evaluate with individual weights, agentclass
+     - Passes w, tuple back to EW
+
+"""
+
+import sys
+import os
+import traceback
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+class Experiment(object):
+    """
+    Experiment class. Instances will drive reinforcement learning experiments.
+
+
+    """
+
+    def __init__(self):
+        self.agentclass = None
+        self.environclass = None
+        self.evolverclass = None
+        self.environmaker = None
+        pass
+
+    def validate(self):
+        valid = True
+        # Test that we have classes to use
+        valid = valid and (not self.agentclass in [None])
+        valid = valid and (not self.environclass in [None])
+        valid = valid and (not self.evolverclass in [None])
+        # Test other values here
+        return valid
+
+    def set_schedule(self, schedule):
+        self.schedule = schedule
+
+    def set_environ_maker(self, environmaker):
+        self.environmaker = environmaker
+
+    def make_environ(self):
+        if not self.environmaker in [None]:
+            try:
+                self.environ = self.environmaker()
+            except:
+                estr = f"Error: traceback.format_exc()"
+                print(estr)
+                self.environ = None
+    
+    def set_agentclass(self, agentclass):
+        # Test class for compatibility
+        okclass = True
+        # No test yet
+
+        if okclass:
+            self.agentclass = agentclass
+
+    def get_agentclass(self):
+        return self.agentclass
+
+    def set_environclass(self, environclass):
+        # Test class for compatibility
+        okclass = True
+
+        if not 'evaluate' in dir(environclass):
+            okclass = False
+            print("set_environclass error: class does not provide 'evaluate'")
+
+        if okclass:
+            self.environclass = environclass
+
+    def get_environclass(self):
+        return self.environclass
+
+    def set_evolverclass(self, evolverclass):
+        # Test class for compatibility
+        okclass = True
+
+        if not 'driver' in dir(evolverclass):
+            okclass = False
+            print("set_evolverclass error: class does not provide 'driver'")
+        
+        if okclass:
+            self.evolverclass = evolverclass
+
+    def set_agent_attributes(self, alpha=0.005):
+        self.agent_props = Holder()
+        self.agent_props.alpha = 0.005
+
+    def set_evolver_attributes(self,
+                               popsize=100,
+                               maxgenerations=10000,
+                               cxpb=0.5,
+                               mtpb=0.05,
+                               wmin=-20.0,
+                               wmax=20.0,
+                               mut_center=0.0,
+                               mut_sigma=0.1,
+                               mut_indpb=0.05,
+                               tournsize=5,
+                               tournk=2,
+                               normalize_fitness=True,
+                               tag='environ'
+                               ):
+        self.evolver_props = Holder()
+        self.evolver_props.popsize = popsize
+        self.evolver_props.maxgenerations = maxgenerations
+        self.evolver_props.cxpb = cxpb
+        self.evolver_props.mtpb = mtpb
+        self.evolver_props.wmin = wmin
+        self.evolver_props.wmax = wmax
+        self.evolver_props.mut_center = mut_center
+        self.evolver_props.mut_sigma = mut_sigma
+        self.evolver_props.mut_indpb = mut_indpb
+        self.evolver_props.tournsize = tournsize
+        self.evolver_props.tournk = tournk
+        self.evolver_props.normalize_fitness = normalize_fitness
+        self.evolver_props.tag = tag
+
+    def make_evolver_instance(self):
+        self.evolver = self.evolverclass(
+            self.environclass,
+            popsize=self.evolver_props.popsize,
+            maxgenerations=self.evolver_props.maxgenerations,
+            cxpb=self.evolver_props.cxpb,
+            mtpb=self.evolver_props.mtpb,
+            wmin=self.evolver_props.wmin,
+            wmax=self.evolver_props.wmax,
+            mut_center= self.evolver_props.mut_center,
+            mut_sigma= self.evolver_props.mut_sigma,
+            mut_indpb= self.evolver_props.mut_indpb,
+            tournsize= self.evolver_props.tournsize,
+            tournk= self.evolver_props.tournk,
+            normalize_fitness= self.evolver_props.normalize_fitness,
+            tag= self.evolver_props.tag
+        )
+        
+    def set_env_attributes(self):
+        self.env_props = Holder()
+
+    def handle_evaluation(self, ind, generation):
+        """
+        evolver calls this to get an evaluation of an
+        individual.
+
+        Depending on the experiment schedule and generation,
+        this may require constructing a new environment.
+        """
+        pass
+
+    def run_experiment(self):
+        """
+        # Run experiment
+        ew = EvolveWeights(world,
+                           popsize=100,
+                           maxgenerations=1000,
+                           tournsize=75,
+                           tournk=3,
+                           normalize_fitness=False)
+        ew.driver()
+        """
+        
--- a/code/gwe.py
+++ b/code/gwe.py
@ -0,0 +1,438 @@
+"""
+gwe.py -- GridWorld Evolving
+
+Bringing together an Agent acting in GridWorld with
+DEAP evolutionary computation.
+
+Notion: Set up for being able to call an Agent with
+a provided set of weights and run their training in
+a Gridworld environment. DEAP keeps a population of 
+weights and handles the evolutionary computation.
+Save the best instantiated Agent per each generation
+for later review and analysis.
+"""
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+
+import numpy as np, itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+import environments
+import agents
+
+# always forces a reload in case you have edited environments or agents
+importlib.reload(environments)
+importlib.reload(agents)
+from environments.gridworld import GridWorld
+from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+from mda import MultiDimArray
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+class GoalsAndHolesWorld(object):
+    """
+    Class for making and using a 2D GridWorld based on 
+    setting goals and holes (hazards) for an RL Agent 
+    to explore.
+
+    """
+    def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass,
+                 killed_reward=-10.0, max_training_trials=50, max_steps=32,
+                 alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42
+                ):
+        self.obsSpace = tuple(obsSpace)
+        self.actSpace = tuple(actSpace)
+        self.goals = list(goals)
+        self.holes = tuple(holes)
+        self.startState = tuple(startstate)
+        self.agentclass = agentclass
+        self.killed_reward = killed_reward
+        self.max_training_trials = max_training_trials
+        self.max_steps = max_steps
+        self.alpha = alpha
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.lmbda = lmbda
+        self.env = self.make_env(self.startState, self.obsSpace, self.goals, self.holes) 
+        print("Goals from env", self.env.goals)
+        pass
+
+    def get_weights_len(self):
+        mywl = np.prod(tuple(self.obsSpace) + tuple(self.actSpace))
+        return mywl
+    
+    def make_env(self, startstate=None, dims=None, goals=None, holes=None):
+        if startstate in [None]:
+            startstate = self.startState
+        if dims in [None]:
+            dims = self.obsSpace
+        if goals in [None]:
+            goals = list(self.goals)
+        if holes in [None]:
+            holes = self.holes
+        print(startstate, dims, goals, holes)
+        myenv =  GridWorld(dims = dims, startState = startstate)
+        myenv.goals.append(goals)
+        for ii in range(holes[0][0], holes[0][1]+1):
+            for jj in range(holes[1][0], holes[1][1]+1):
+                print("adding hole at ", ii, jj)
+                myenv.holes.append([ii,jj])
+        return myenv
+
+    def run_trial(self, agent, env=None):
+        if env in [None]:
+            env = self.env
+        agent.reset() # soft-reset() (keeps learned weights)
+        nextState = env.reset()
+        lastState = nextState
+        runtime = 0
+        while True:
+            runtime += 1
+            status = 'alive'
+            # set agent senses based on environment and allow agent to determine an action
+            agent.sensoryState = nextState
+            agent.plasticUpdate()
+            # determine effect on environment state & any reward (in standard openAI-gym API format)
+            nextState, reward, goal_achieved, _ = env.step(agent.action)
+            #if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)):
+            #    print(agent.action, lastState, reward, goal_achieved, nextState)
+            lastState = nextState
+            agent.reward = reward
+            if goal_achieved or (runtime >= self.max_steps): break
+            # stop trial if agent explitly failed early
+            elif reward <= self.killed_reward:
+                agent.sensoryState = nextState
+                agent.reward = reward
+                agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward
+                agent.reset()
+                nextState = env.reset()
+                status = 'killed'
+                runtime = self.max_steps
+                break
+            # print(time, agent.action, agent.reward, status)
+        #print("  runtime", runtime)
+        #if goal_achieved:
+        #    print("  Goal Achieved!!!")
+        return agent, runtime
+
+    def evaluate(self, ind, return_agent=False):
+        """
+
+        """
+        latest = 20
+        # Pull weights from ind
+        # Instantiate an Agent
+        myagent = Agent(obsSpace=self.obsSpace, actSpace=self.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
+        # Put weights in the Agent
+        myagent.weights = [x for x in ind]
+        #print("  myagent.weights", myagent.weights)
+        # run_trial calls
+        time_to_solve_each_trial = [] # lower is better
+        for trialN in range(self.max_training_trials):
+            # some output to see it running
+            # if (trialN % 10) == 0: print('.',end='')
+            myagent, runtime = self.run_trial(myagent)
+            # record trial results
+            time_to_solve_each_trial.append(runtime)
+        #print("  tts", time_to_solve_each_trial)
+        # calculate fitness
+        # Fitness is 1 - (avg. tts / max. time) 
+        # w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
+        ltts = len(time_to_solve_each_trial)
+        latest = ltts // 2
+        # Latter half of steps
+        #w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps))
+        # First half of steps
+        w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps))
+        # return the fitness
+        #print("  fitness", "%3.2f" % w)
+        #print("  myagent.weights after", myagent.weights)
+        if return_agent:
+            return myagent, w, time_to_solve_each_trial
+        else:
+            return w,
+
+
+class MaxAve(object):
+    def __init__(self, alpha=0.1):
+        self.alpha = alpha
+        pass
+
+    def get_weights_len(self, wl=100):
+        return wl
+
+    def evaluate(self, ind):
+        npwts = np.array([x for x in ind])
+        wtmax = np.max(np.abs(npwts))
+        wtmean = np.mean(np.abs(npwts))
+        if 0.0 != wtmax:
+            w = wtmean / wtmax
+        else:
+            w = 0.0
+        return w,
+
+class EvolveWeights(object):
+    """
+    Class to apply DEAP to evolve a population consisting of a set
+    of weights.
+    """
+
+    def __init__(self, gahw,
+                 popsize=100, maxgenerations=10000,
+                 cxpb=0.5, mtpb=0.05,
+                 wmin=-20.0, wmax=20.0,
+                 mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
+                 tournsize=5,
+                 tournk=2,
+                 normalize_fitness=True,
+                 tag='gahw'
+    ):
+        self.tag = tag
+        self.starttime = isotime()
+        self.logbase = tag + "_" + t2fn(self.starttime)
+                                
+        self.gahw = gahw
+        self.weights_len = gahw.get_weights_len()        
+
+        self.popsize = popsize
+        self.maxgenerations = maxgenerations
+        self.cxpb = cxpb
+        self.mtpb = mtpb
+        self.wmin = wmin
+        self.wmax = wmax
+        self.mut_center = mut_center
+        self.mut_sigma = mut_sigma
+        self.mut_indpb = mut_indpb
+        self.tournsize = tournsize
+        self.tournk = tournk
+        self.normalize_fitness = normalize_fitness
+        pass
+
+    def masv(self, pop):
+        mav = []
+        maxs = []
+        for ind in pop:
+            wts = [x for x in ind]
+            mav.append(np.mean(np.abs(wts)))
+            maxs.append(np.max(np.abs(wts)))
+        allmax = np.max(maxs)
+        mymasv = [x/allmax for x in mav]
+        return mymasv
+
+    def cxTwoPointCopy(self, ind1, ind2):
+        """Execute a two points crossover with copy on the input individuals. The
+        copy is required because the slicing in numpy returns a view of the data,
+        which leads to a self overwriting in the swap operation. It prevents
+        ::
+            >>> import numpy as np
+            >>> a = np.array((1,2,3,4))
+            >>> b = np.array((5,6,7,8))
+            >>> a[1:3], b[1:3] = b[1:3], a[1:3]
+            >>> print(a)
+            [1 6 7 4]
+            >>> print(b)
+            [5 6 7 8]
+        """
+        size = len(ind1)
+        cxpoint1 = random.randint(1, size)
+        cxpoint2 = random.randint(1, size - 1)
+        if cxpoint2 >= cxpoint1:
+            cxpoint2 += 1
+        else: # Swap the two cx points
+            cxpoint1, cxpoint2 = cxpoint2, cxpoint1
+        ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
+        return ind1, ind2
+
+    def zero(self):
+        return 0.0
+
+    def smallrandom(self, eps=None):
+        """
+        Produce a small random number in [-eps .. eps].
+
+        A random variate in [-1 .. 1] is produced then
+        multiplied by eps, so the final range is in [-eps .. eps].
+
+        """
+        if eps in [None]:
+            eps = self.gahw.alpha
+        rv = ((2.0 * random.random()) - 1.0) * eps
+        return rv
+    
+    def setup(self):
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
+
+        self.toolbox = base.Toolbox()
+
+        self.pool = multiprocessing.Pool()
+        self.toolbox.register("map", self.pool.map)
+        
+        #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
+        # self.toolbox.register("attr_float", random.random)
+        #self.toolbox.register("attr_float", self.zero)
+        self.toolbox.register("attr_float", self.smallrandom)
+
+        self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+
+        self.toolbox.register("evaluate", self.gahw.evaluate)
+        #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
+        self.toolbox.register("mate", self.cxTwoPointCopy)
+        #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
+        self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
+        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)        
+
+    def normalize_fitnesses(self, fitnesses):
+        #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        maxfitness = np.max([x[0] for x in fitnesses])
+        #print("maxfitness", maxfitness)
+        listfit = [x[0] for x in fitnesses]
+        #print("listfit", listfit)
+        normfit = [x/maxfitness for x in listfit]
+        #print("normfit", normfit)
+        fitnesses = [tuple([x]) for x in normfit]
+        #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        return fitnesses
+
+    def log_it(self, generation):
+        pool = self.pool
+        toolbox = self.toolbox
+        self.pool = None
+        self.toolbox = None
+        pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
+        pickle.dump(self, open(pklfn, "wb"))
+        self.pool = pool
+        self.toolbox = toolbox
+        
+    def loop(self):
+        self.population = self.toolbox.population(n=self.popsize)
+        #print(self.masv(self.population))
+        NGEN=self.maxgenerations
+        for gen in range(NGEN):
+            print("generation",  gen)
+            offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
+            # print("offspring", offspring)
+            # constrain genome values to [0,1]
+            for offspring_i,individual in enumerate(offspring):
+              np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
+            # print("clipped offspring", offspring)
+            # Evaluate the individuals with an invalid fitness (not yet evaluated)
+            # print("check fitness.valid")
+            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+            # print("invalid_ind", len(invalid_ind))
+            #print("setting fitness")
+            fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
+            if self.normalize_fitness:
+                fitnesses = self.normalize_fitnesses(fitnesses)
+                """
+                #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                maxfitness = np.max([x[0] for x in fitnesses])
+                #print("maxfitness", maxfitness)
+                listfit = [x[0] for x in fitnesses]
+                #print("listfit", listfit)
+                normfit = [x/maxfitness for x in listfit]
+                #print("normfit", normfit)
+                fitnesses = [tuple([x]) for x in normfit]
+                #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                """
+            # print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+            self.fitness_dist(fitnesses)
+            # print("update ind fitness")
+            for ind, fit in zip(invalid_ind, fitnesses):
+                ind.fitness.values = fit
+            #print("selection")
+            #print("offspring\n", self.masv(offspring))
+            self.offspring = offspring
+            self.population = self.toolbox.select(offspring, k=len(self.population))
+            if 0 == gen % 100:
+                self.log_it(gen)
+            
+            #print("population after selection\n", self.masv(self.population))
+            #print("Report for generation", gen)
+            self.report()
+            
+    def report(self):
+        # post-evolution analysis
+        fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
+        if self.normalize_fitness:
+            fitnesses = self.normalize_fitnesses(fitnesses)
+        self.fitnesses = fitnesses
+        self.sortedFitnesses = sorted(fitnesses)
+        self.sortedFitnesses.reverse()
+        self.fitness_dist(fitnesses)
+        
+        self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
+        print("best/worst w", self.bestFitness, self.worstFitness)
+
+        self.bestGenome = tools.selBest(self.population, k=1)
+        # print(self.bestGenome)
+
+    def ffmt(self, value, fmt="%3.2f"):
+        return fmt % value
+        
+    def fitness_dist(self, fitnesses):
+        listfit = [x[0] for x in fitnesses]
+        pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
+        print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
+        
+    def driver(self):
+        # Initialize
+        self.setup()
+        # Generation loop
+        self.loop()
+        # Report
+        self.report()
+        self.log_it(self.maxgenerations)
+        print(self.masv(self.population))
+        pass
+    
+def holes_block_direct_route():
+    # GridWorld as in 'gridworld.ipynb'
+    gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200)
+    ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=100, tournk=2, normalize_fitness=False)
+    ew.driver()
+
+def maxave():
+    ma = MaxAve()
+    ew = EvolveWeights(ma, popsize = 100, maxgenerations=100)
+    ew.driver()
+    
+if __name__ == "__main__":
+
+    holes_block_direct_route()
+    # maxave()
+    
+    pass
--- a/code/mda.py
+++ b/code/mda.py
@ -0,0 +1,85 @@
+import numpy as np
+from typing import Any, Union, List, Tuple
+
+class MultiDimArray:
+    """
+    A class to represent and manipulate multi-dimensional arrays.
+
+    Attributes
+    ----------
+    mdary : numpy.ndarray
+        A multi-dimensional array containing the input data.
+    shape : tuple
+        The shape of the input multi-dimensional array.
+
+    Methods
+    -------
+    flatten(output_type="list") -> Union[List, Tuple, np.ndarray]:
+        Returns the flattened version of the multi-dimensional array as a list, tuple, or Numpy array.
+
+    foldout(vector, output_type="list") -> Union[List, Tuple, np.ndarray]:
+        Reshapes a 1D vector back into the original shape of the multi-dimensional array,
+        and returns it as a list, tuple, or Numpy array.
+    """
+    def __init__(self, mdary: Union[List, Tuple, np.ndarray]):
+        self.mdary = np.array(mdary)
+        self.shape = self.mdary.shape
+
+    def flatten(self, output_type: str = "list") -> Union[List, Tuple, np.ndarray]:
+        """
+        Flatten the multi-dimensional array.
+
+        Parameters
+        ----------
+        output_type : str, optional
+            The output type of the flattened array, either 'list', 'tuple', or 'numpy' (default is 'list').
+
+        Returns
+        -------
+        Union[List, Tuple, np.ndarray]
+            The flattened version of the multi-dimensional array in the specified output
+        """
+        flat_array = self.mdary.flatten()
+
+        if output_type == "list":
+            return flat_array.tolist()
+        elif output_type == "tuple":
+            return tuple(flat_array)
+        elif output_type == "numpy":
+            return flat_array
+        else:
+            raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'")
+
+    def foldout(self, vector: Union[List, Tuple, np.ndarray], output_type: str = "list") -> Union[List, Tuple, np.ndarray]:
+        if len(vector) != self.mdary.size:
+            raise ValueError("The input vector must have the same length as the flattened form of the multi-dimensional array")
+
+        reshaped_array = np.reshape(vector, self.shape)
+
+        if output_type == "list":
+            return reshaped_array.tolist()
+        elif output_type == "tuple":
+            return tuple(map(tuple, reshaped_array))
+        elif output_type == "numpy":
+            return reshaped_array
+        else:
+            raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'")
+
+if __name__ == "__main__":
+    """
+    Example usage:
+    """
+    mda = MultiDimArray([[1, 2], [3, 4], [5,6]])
+    #mda = MultiDimArray([1, 2, 3, 4, 5,6])
+    print(f"Input array: {str(mda.mdary.tolist())}")
+    flat = mda.flatten(output_type="list")
+    print(f"Flattened array: {flat}")
+
+    # Assuming the flat array is [1, 2, 3, 4]
+    folded = mda.foldout(flat, output_type="list")
+    print(f"Folded back array: {folded}")
+
+    """
+    The folded back array should be numerically identical to the original mdary:
+    [[1, 2], [3, 4]]
+    """
--- a/code/multigwe.py
+++ b/code/multigwe.py
@ -0,0 +1,568 @@
+"""multigwe.py -- Multi GridWorlds Evolving
+
+Bringing together an Agent acting in one of multiple GridWorlds with
+DEAP evolutionary computation.
+
+Notion: Set up for being able to call an Agent with a provided set of
+weights and run their training in one of multiple Gridworld
+environments. DEAP keeps a population of weights and handles the
+evolutionary computation.  Save the best instantiated Agent per each
+generation for later review and analysis.
+
+"""
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+
+import numpy as np, itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+import environments
+import agents
+
+# always forces a reload in case you have edited environments or agents
+importlib.reload(environments)
+importlib.reload(agents)
+from environments.gridworld import GridWorld
+from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+from mda import MultiDimArray
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+class GoalsAndHolesWorld(object):
+    """
+    Class for making and using a 2D GridWorld based on 
+    setting goals and holes (hazards) for an RL Agent 
+    to explore.
+
+    Modifications for multiple maps...
+    Need a 'maps' array
+
+    """
+    def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass,
+                 killed_reward=-10.0, max_training_trials=50, max_steps=32,
+                 alpha=0.005, gamma=0.95, epsilon=0.01, lmbda=0.42
+                ):
+        
+        self.maps = []
+        mymap = Holder()
+        self.add_map(obsSpace, actSpace, goals, holes, startstate)
+        # Instance now has the initial map in place
+        
+        self.agentclass = agentclass
+        self.killed_reward = killed_reward
+        self.max_training_trials = max_training_trials
+        self.max_steps = max_steps
+        self.alpha = alpha
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.lmbda = lmbda
+        print("Goals from initial env", self.maps[0].env.goals)
+        pass
+
+    def get_weights_len(self):
+        mywl = np.prod(tuple(self.maps[0].obsSpace) + tuple(self.maps[0].actSpace))
+        return mywl
+
+    def add_map(self,  obsSpace, actSpace, goals, holes, startstate):
+        mymap = Holder()
+        mymap.obsSpace = tuple(obsSpace)
+        mymap.actSpace = tuple(actSpace)
+        mymap.goals = list(goals)
+        mymap.holes = tuple(holes)
+        mymap.startState = tuple(startstate)
+        mymap.env = self.make_env(mymap.startState, mymap.obsSpace, mymap.goals, mymap.holes) 
+        self.maps.append(mymap)
+    
+    def make_env(self, startstate=None, dims=None, goals=None, holes=None):
+        # Default: the first map in the list.
+        if startstate in [None] and 0 < len(self.maps):
+            startstate = self.maps[0].startState
+        if dims in [None] and 0 < len(self.maps):
+            dims = self.maps[0].obsSpace
+        if goals in [None] and 0 < len(self.maps):
+            goals = list(self.maps[0].goals)
+        if holes in [None] and 0 < len(self.maps):
+            holes = self.maps[0].holes
+        print(startstate, dims, goals, holes)
+        myenv =  GridWorld(dims = dims, startState = startstate)
+        myenv.goals.append(goals)
+        for ii in range(holes[0][0], holes[0][1]+1):
+            for jj in range(holes[1][0], holes[1][1]+1):
+                print("adding hole at ", ii, jj)
+                myenv.holes.append([ii,jj])
+        return myenv
+
+    def run_trial(self, agent, env=None):
+        if env in [None]:
+            # Choose an environment
+            """
+            if 1 == len(self.maps):
+                mymap = self.maps[0]
+            else:
+                mymap = random.choice(self.maps)
+            """
+            mymap = self.choose_map()
+            env = mymap.env
+ 
+        agent.reset() # soft-reset() (keeps learned weights)
+        nextState = env.reset()
+        lastState = nextState
+        runtime = 0
+        while True:
+            runtime += 1
+            status = 'alive'
+            # set agent senses based on environment and allow agent to determine an action
+            agent.sensoryState = nextState
+            agent.plasticUpdate()
+            # determine effect on environment state & any reward (in standard openAI-gym API format)
+            nextState, reward, goal_achieved, _ = env.step(agent.action)
+            
+            #if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)):
+            #    print(agent.action, lastState, reward, goal_achieved, nextState)
+            lastState = nextState
+            agent.reward = reward
+            if goal_achieved or (runtime >= self.max_steps): break
+            # stop trial if agent explitly failed early
+            elif reward <= self.killed_reward:
+                agent.sensoryState = nextState
+                agent.reward = reward
+                agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward
+                agent.reset()
+                nextState = env.reset()
+                status = 'killed'
+                runtime = self.max_steps
+                break
+            # print(time, agent.action, agent.reward, status)
+        #print("  runtime", runtime)
+        #if goal_achieved:
+        #    print("  Goal Achieved!!!")
+        return agent, runtime
+
+    def choose_map(self, map_index=None):
+        """
+        If map_index in [0..len(self.maps)], return that one.
+        Else return one randomly.
+        """
+        # print("self.maps", self.maps)
+        
+        if map_index in [None]:
+            # Random choice of map from alternatives
+            if 1 == len(self.maps): # There can only be one
+                mymap = self.maps[0]
+            else: # Choose one of them
+                mymap = random.choice(self.maps)
+        elif 0 <= map_index and map_index < len(self.maps):
+            mymap = self.maps[map_index]
+        else:
+            mymap = random.choice(self.maps)
+        return mymap
+    
+    def evaluate(self, ind, return_agent=False):
+        """
+
+        """
+        latest = 20
+        # Pull weights from ind
+
+        # Choose an environment
+        """
+        if 1 == len(self.maps):
+            mymap = self.maps[0]
+        else:
+            mymap = random.choice(self.maps)
+        """
+        
+        # New way
+        mymap = self.choose_map()
+
+        myenv = mymap.env
+        
+        # Instantiate an Agent
+        myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
+
+        # Should consider one round of single trial to get the performance due to
+        # inheritance, then proceed with full trials to 'develop' the agent,
+        # and get its trained performance.
+        
+        # Put weights in the Agent
+        myagent.weights = [x for x in ind]
+        #print("  myagent.weights", myagent.weights)
+        # run_trial calls
+        time_to_solve_each_trial = [] # lower is better
+        for trialN in range(self.max_training_trials):
+            # some output to see it running
+            # if (trialN % 10) == 0: print('.',end='')
+            myagent, runtime = self.run_trial(myagent, env=myenv)
+            # record trial results
+            time_to_solve_each_trial.append(runtime)
+        #print("  tts", time_to_solve_each_trial)
+        # calculate fitness
+        # Fitness is 1 - (avg. tts / max. time) 
+        # w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
+        ltts = len(time_to_solve_each_trial)
+        latest = ltts // 2
+        # Latter half of steps
+        #w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps))
+        # First half of steps
+        w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps))
+        # return the fitness
+        #print("  fitness", "%3.2f" % w)
+        #print("  myagent.weights after", myagent.weights)
+        if return_agent:
+            return myagent, w, time_to_solve_each_trial
+        else:
+            return w,
+
+
+    def multi_evaluate(self, ind, return_agent=False):
+        """
+        Like 'evaluate', but when multiple maps exist, evaluate per
+        each map, collect performance, and return fitness as the 
+        mean performance across all maps.
+
+        """
+        latest = 20
+        # Pull weights from ind
+        
+        # Info across all maps/environments
+        time_to_solve_each_trial = [] # lower is better
+        for mymap in self.maps:
+            myenv = mymap.env
+            # Instantiate an Agent
+            myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
+            # Put weights in the Agent
+            myagent.weights = [x for x in ind]
+            #print("  myagent.weights", myagent.weights)
+            # run_trial calls
+            for trialN in range(self.max_training_trials):
+                # some output to see it running
+                # if (trialN % 10) == 0: print('.',end='')
+                myagent, runtime = self.run_trial(myagent, env=myenv)
+                # record trial results
+                time_to_solve_each_trial.append(runtime)
+
+        # calculate fitness
+        # Fitness is 1 - (avg. tts / max. time) 
+        w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
+        # return the fitness
+        if return_agent:
+            return myagent, w, time_to_solve_each_trial
+        else:
+            return w,
+        
+class MaxAve(object):
+    def __init__(self, alpha=0.1):
+        self.alpha = alpha
+        pass
+
+    def get_weights_len(self, wl=100):
+        return wl
+
+    def evaluate(self, ind):
+        npwts = np.array([x for x in ind])
+        wtmax = np.max(np.abs(npwts))
+        wtmean = np.mean(np.abs(npwts))
+        if 0.0 != wtmax:
+            w = wtmean / wtmax
+        else:
+            w = 0.0
+        return w,
+
+class EvolveWeights(object):
+    """
+    Class to apply DEAP to evolve a population consisting of a set
+    of weights.
+    """
+
+    def __init__(self, gahw,
+                 popsize=100, maxgenerations=10000,
+                 cxpb=0.5, mtpb=0.05,
+                 wmin=-20.0, wmax=20.0,
+                 mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
+                 tournsize=5,
+                 tournk=2,
+                 normalize_fitness=True,
+                 tag='gahw'
+    ):
+        self.tag = tag
+        self.starttime = isotime()
+        self.logbase = tag + "_" + t2fn(self.starttime)
+                                
+        self.gahw = gahw
+        self.weights_len = gahw.get_weights_len()        
+
+        self.popsize = popsize
+        self.maxgenerations = maxgenerations
+        self.cxpb = cxpb
+        self.mtpb = mtpb
+        self.wmin = wmin
+        self.wmax = wmax
+        self.mut_center = mut_center
+        self.mut_sigma = mut_sigma
+        self.mut_indpb = mut_indpb
+        self.tournsize = tournsize
+        self.tournk = tournk
+        self.normalize_fitness = normalize_fitness
+        pass
+
+    def masv(self, pop):
+        mav = []
+        maxs = []
+        for ind in pop:
+            wts = [x for x in ind]
+            mav.append(np.mean(np.abs(wts)))
+            maxs.append(np.max(np.abs(wts)))
+        allmax = np.max(maxs)
+        mymasv = [x/allmax for x in mav]
+        return mymasv
+
+    def cxTwoPointCopy(self, ind1, ind2):
+        """Execute a two points crossover with copy on the input individuals. The
+        copy is required because the slicing in numpy returns a view of the data,
+        which leads to a self overwriting in the swap operation. It prevents
+        ::
+            >>> import numpy as np
+            >>> a = np.array((1,2,3,4))
+            >>> b = np.array((5,6,7,8))
+            >>> a[1:3], b[1:3] = b[1:3], a[1:3]
+            >>> print(a)
+            [1 6 7 4]
+            >>> print(b)
+            [5 6 7 8]
+        """
+        size = len(ind1)
+        cxpoint1 = random.randint(1, size)
+        cxpoint2 = random.randint(1, size - 1)
+        if cxpoint2 >= cxpoint1:
+            cxpoint2 += 1
+        else: # Swap the two cx points
+            cxpoint1, cxpoint2 = cxpoint2, cxpoint1
+        ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
+        return ind1, ind2
+
+    def zero(self):
+        return 0.0
+
+    def smallrandom(self, eps=None):
+        """
+        Produce a small random number in [-eps .. eps].
+
+        A random variate in [-1 .. 1] is produced then
+        multiplied by eps, so the final range is in [-eps .. eps].
+
+        """
+        if eps in [None]:
+            eps = self.gahw.alpha
+        rv = ((2.0 * random.random()) - 1.0) * eps
+        return rv
+    
+    def setup(self):
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
+
+        self.toolbox = base.Toolbox()
+
+        self.pool = multiprocessing.Pool()
+        self.toolbox.register("map", self.pool.map)
+        
+        #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
+        # self.toolbox.register("attr_float", random.random)
+        #self.toolbox.register("attr_float", self.zero)
+        self.toolbox.register("attr_float", self.smallrandom)
+
+        self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+
+        # self.toolbox.register("evaluate", self.gahw.evaluate)
+        self.toolbox.register("evaluate", self.gahw.multi_evaluate)
+        #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
+        self.toolbox.register("mate", self.cxTwoPointCopy)
+        #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
+        self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
+        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)        
+
+    def normalize_fitnesses(self, fitnesses):
+        #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        maxfitness = np.max([x[0] for x in fitnesses])
+        #print("maxfitness", maxfitness)
+        listfit = [x[0] for x in fitnesses]
+        #print("listfit", listfit)
+        normfit = [x/maxfitness for x in listfit]
+        #print("normfit", normfit)
+        fitnesses = [tuple([x]) for x in normfit]
+        #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        return fitnesses
+
+    def log_it(self, generation):
+        pool = self.pool
+        toolbox = self.toolbox
+        self.pool = None
+        self.toolbox = None
+        pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
+        pickle.dump(self, open(pklfn, "wb"))
+        self.pool = pool
+        self.toolbox = toolbox
+        
+    def loop(self):
+        self.population = self.toolbox.population(n=self.popsize)
+        #print(self.masv(self.population))
+        NGEN=self.maxgenerations
+        for gen in range(NGEN):
+            print("generation",  gen)
+            offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
+            # print("offspring", offspring)
+            # constrain genome values to [0,1]
+            for offspring_i,individual in enumerate(offspring):
+              np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
+            # print("clipped offspring", offspring)
+            # Evaluate the individuals with an invalid fitness (not yet evaluated)
+            # print("check fitness.valid")
+            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+            # print("invalid_ind", len(invalid_ind))
+            #print("setting fitness")
+            fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
+            if self.normalize_fitness:
+                fitnesses = self.normalize_fitnesses(fitnesses)
+                """
+                #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                maxfitness = np.max([x[0] for x in fitnesses])
+                #print("maxfitness", maxfitness)
+                listfit = [x[0] for x in fitnesses]
+                #print("listfit", listfit)
+                normfit = [x/maxfitness for x in listfit]
+                #print("normfit", normfit)
+                fitnesses = [tuple([x]) for x in normfit]
+                #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                """
+            # print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+            self.fitness_dist(fitnesses)
+            # print("update ind fitness")
+            for ind, fit in zip(invalid_ind, fitnesses):
+                ind.fitness.values = fit
+            #print("selection")
+            #print("offspring\n", self.masv(offspring))
+            self.offspring = offspring
+            self.population = self.toolbox.select(offspring, k=len(self.population))
+            if 0 == gen % 100:
+                self.log_it(gen)
+            
+            #print("population after selection\n", self.masv(self.population))
+            #print("Report for generation", gen)
+            self.report()
+            
+    def report(self):
+        # post-evolution analysis
+        fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
+        if self.normalize_fitness:
+            fitnesses = self.normalize_fitnesses(fitnesses)
+        self.fitnesses = fitnesses
+        self.sortedFitnesses = sorted(fitnesses)
+        self.sortedFitnesses.reverse()
+        self.fitness_dist(fitnesses)
+        
+        self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
+        print("best/worst w", self.bestFitness, self.worstFitness)
+
+        self.bestGenome = tools.selBest(self.population, k=1)
+        # print(self.bestGenome)
+
+    def ffmt(self, value, fmt="%3.2f"):
+        return fmt % value
+        
+    def fitness_dist(self, fitnesses):
+        listfit = [x[0] for x in fitnesses]
+        pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
+        print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
+        
+    def driver(self):
+        # Initialize
+        self.setup()
+        # Generation loop
+        self.loop()
+        # Report
+        self.report()
+        self.log_it(self.maxgenerations)
+        print(self.masv(self.population))
+        pass
+    
+def holes_block_direct_route():
+    # GridWorld as in 'gridworld.ipynb'
+    gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200)
+    ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+
+def holes_block_direct_route_two_goals():
+    # GridWorld as in 'gridworld.ipynb'
+    gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
+    gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6))
+    ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+
+def holes_block_direct_route_two_goals_left():
+    # GridWorld as in 'gridworld.ipynb'
+    gahw = GoalsAndHolesWorld((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
+    gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6))
+    ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+def holes_block_direct_route_two_goals_right():
+    # GridWorld as in 'gridworld.ipynb'
+    gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
+    gahw.add_map((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6))
+    ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+
+    
+def maxave():
+    ma = MaxAve()
+    ew = EvolveWeights(ma, popsize = 100, maxgenerations=500)
+    ew.driver()
+    
+if __name__ == "__main__":
+
+    #holes_block_direct_route()
+    print("Two different goals")
+    holes_block_direct_route_two_goals()
+    print("Two environments, both have goal on left.")
+    holes_block_direct_route_two_goals_left()   
+    print("Two environments, both have goal on right.")
+    holes_block_direct_route_two_goals_right()   
+    
+    # maxave()
+    
+    pass
--- a/code/pe.py
+++ b/code/pe.py
@ -0,0 +1,328 @@
+"""
+pe.py
+
+puzzles evolving
+
+"""
+
+
+import sys
+# allow importing from the 'code/' dir
+sys.path.append("../code")
+
+import os
+import platform
+import pickle
+import json
+import traceback
+import datetime
+import copy
+
+import numpy as np, itertools, copy
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import importlib  # module reloading
+
+import environments
+import agents
+
+# always forces a reload in case you have edited environments or agents
+importlib.reload(environments)
+importlib.reload(agents)
+#from environments.gridworld import GridWorld
+import environments.puzzle as pz
+from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
+
+from agents.q_agent import EvolvableAgent as Agent
+
+# DEAP imports
+
+import random
+from deap import creator, base, tools, algorithms
+
+import multiprocessing
+
+#pool = multiprocessing.Pool()
+#toolbox.register("map", pool.map)
+
+# Weight handling
+from mda import MultiDimArray
+
+def isotime():
+    return datetime.datetime.now().isoformat()
+
+def t2fn(timestamp):
+    timestamp = timestamp.replace('.','_')
+    timestamp = timestamp.replace(':','_')
+    return timestamp
+
+class Holder(object):
+    def __init__(self):
+        pass
+
+
+class EvolveWeights(object):
+    """
+    Class to apply DEAP to evolve a population consisting of a set
+    of weights.
+    """
+
+    def __init__(self, environ,
+                 popsize=100, maxgenerations=10000,
+                 cxpb=0.5, mtpb=0.05,
+                 wmin=-20.0, wmax=20.0,
+                 mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
+                 tournsize=5,
+                 tournk=2,
+                 normalize_fitness=True,
+                 tag='environ'
+    ):
+        self.tag = tag
+        self.starttime = isotime()
+        self.logbase = tag + "_" + t2fn(self.starttime)
+                                
+        self.environ = environ
+        self.weights_len = environ.get_weights_len()
+
+        self.popsize = popsize
+        self.maxgenerations = maxgenerations
+        self.cxpb = cxpb
+        self.mtpb = mtpb
+        self.wmin = wmin
+        self.wmax = wmax
+        self.mut_center = mut_center
+        self.mut_sigma = mut_sigma
+        self.mut_indpb = mut_indpb
+        self.tournsize = tournsize
+        self.tournk = tournk
+        self.normalize_fitness = normalize_fitness
+        pass
+
+    def masv(self, pop):
+        mav = []
+        maxs = []
+        for ind in pop:
+            wts = [x for x in ind]
+            mav.append(np.mean(np.abs(wts)))
+            maxs.append(np.max(np.abs(wts)))
+        allmax = np.max(maxs)
+        mymasv = [x/allmax for x in mav]
+        return mymasv
+
+    def cxTwoPointCopy(self, ind1, ind2):
+        """Execute a two points crossover with copy on the input individuals. The
+        copy is required because the slicing in numpy returns a view of the data,
+        which leads to a self overwriting in the swap operation. It prevents
+        ::
+            >>> import numpy as np
+            >>> a = np.array((1,2,3,4))
+            >>> b = np.array((5,6,7,8))
+            >>> a[1:3], b[1:3] = b[1:3], a[1:3]
+            >>> print(a)
+            [1 6 7 4]
+            >>> print(b)
+            [5 6 7 8]
+        """
+        size = len(ind1)
+        cxpoint1 = random.randint(1, size)
+        cxpoint2 = random.randint(1, size - 1)
+        if cxpoint2 >= cxpoint1:
+            cxpoint2 += 1
+        else: # Swap the two cx points
+            cxpoint1, cxpoint2 = cxpoint2, cxpoint1
+        ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
+        return ind1, ind2
+
+    def zero(self):
+        return 0.0
+
+    def smallrandom(self, eps=None):
+        """
+        Produce a small random number in [-eps .. eps].
+
+        A random variate in [-1 .. 1] is produced then
+        multiplied by eps, so the final range is in [-eps .. eps].
+
+        """
+        if eps in [None]:
+            eps = self.environ.alpha
+        rv = ((2.0 * random.random()) - 1.0) * eps
+        return rv
+    
+    def setup(self):
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
+
+        self.toolbox = base.Toolbox()
+
+        self.pool = multiprocessing.Pool()
+        self.toolbox.register("map", self.pool.map)
+        
+        #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
+        # self.toolbox.register("attr_float", random.random)
+        #self.toolbox.register("attr_float", self.zero)
+        self.toolbox.register("attr_float", self.smallrandom)
+
+        self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+
+        # self.toolbox.register("evaluate", self.environ.evaluate)
+        self.toolbox.register("evaluate", self.environ.evaluate)
+        #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
+        self.toolbox.register("mate", self.cxTwoPointCopy)
+        #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
+        self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
+        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)        
+
+    def normalize_fitnesses(self, fitnesses):
+        #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        maxfitness = np.max([x[0] for x in fitnesses])
+        #print("maxfitness", maxfitness)
+        listfit = [x[0] for x in fitnesses]
+        #print("listfit", listfit)
+        normfit = [x/maxfitness for x in listfit]
+        #print("normfit", normfit)
+        fitnesses = [tuple([x]) for x in normfit]
+        #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+        return fitnesses
+
+    def log_it(self, generation):
+        pool = self.pool
+        toolbox = self.toolbox
+        self.pool = None
+        self.toolbox = None
+        pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
+        pickle.dump(self, open(pklfn, "wb"))
+        self.pool = pool
+        self.toolbox = toolbox
+        
+    def loop(self):
+        self.population = self.toolbox.population(n=self.popsize)
+        #print(self.masv(self.population))
+        NGEN=self.maxgenerations
+        for gen in range(NGEN):
+            print("generation",  gen)
+            offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
+            # print("offspring", offspring)
+            # constrain genome values to [0,1]
+            for offspring_i,individual in enumerate(offspring):
+              np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
+            # print("clipped offspring", offspring)
+            # Evaluate the individuals with an invalid fitness (not yet evaluated)
+            # print("check fitness.valid")
+            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+            # print("invalid_ind", len(invalid_ind))
+            #print("setting fitness")
+            fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
+            if self.normalize_fitness:
+                fitnesses = self.normalize_fitnesses(fitnesses)
+                """
+                #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                maxfitness = np.max([x[0] for x in fitnesses])
+                #print("maxfitness", maxfitness)
+                listfit = [x[0] for x in fitnesses]
+                #print("listfit", listfit)
+                normfit = [x/maxfitness for x in listfit]
+                #print("normfit", normfit)
+                fitnesses = [tuple([x]) for x in normfit]
+                #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+                """
+            print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
+            self.fitness_dist(fitnesses)
+            # print("update ind fitness")
+            for ind, fit in zip(invalid_ind, fitnesses):
+                ind.fitness.values = fit
+            #print("selection")
+            #print("offspring\n", self.masv(offspring))
+            self.offspring = offspring
+            self.population = self.toolbox.select(offspring, k=len(self.population))
+            if 0 == gen % 100:
+                self.log_it(gen)
+            
+            #print("population after selection\n", self.masv(self.population))
+            #print("Report for generation", gen)
+            self.report()
+            
+    def report(self):
+        # post-evolution analysis
+        fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
+        if self.normalize_fitness:
+            fitnesses = self.normalize_fitnesses(fitnesses)
+        self.fitnesses = fitnesses
+        self.sortedFitnesses = sorted(fitnesses)
+        self.sortedFitnesses.reverse()
+        self.fitness_dist(fitnesses)
+        
+        self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
+        print("best/worst w", self.bestFitness, self.worstFitness)
+
+        self.bestGenome = tools.selBest(self.population, k=1)
+        # print(self.bestGenome)
+
+    def ffmt(self, value, fmt="%3.2f"):
+        return fmt % value
+        
+    def fitness_dist(self, fitnesses):
+        listfit = [x[0] for x in fitnesses]
+        pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
+        print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
+        
+    def driver(self):
+        # Initialize
+        self.setup()
+        # Generation loop
+        self.loop()
+        # Report
+        self.report()
+        self.log_it(self.maxgenerations)
+        print(self.masv(self.population))
+        pass
+
+    
+def puzzles_exp_1():
+    '''full test of the conveyorbelt world
+
+  >>> import copy
+  >>> maxrewards = [1]
+  >>> easy_features = [[0,1],[0,1],[3,1],[0,0]]
+  >>> easy_rewards = [-1,-1,-1,1]
+  >>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
+  >>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
+  >>> p2 = copy.deepcopy(p1)
+  >>> puzzles = (p1,p2)
+  >>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False)
+  >>> world.append(p1)
+  >>> world.append(p2)
+:
+'''
+    maxrewards = [1]
+    easy_features = [[0,1],[0,1],[3,1],[0,0]]
+    easy_rewards = [-1,-1,-1,1]
+    easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
+    p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
+    p2 = copy.deepcopy(p1)
+    puzzles = (p1, p2)
+    world = ConvBelt(actionSpace = getActionSpace(puzzles),
+                     observationSpace = getObservationSpace(puzzles),
+                     maxRewards = maxrewards,
+                     agentclass=Agent,
+                     randomize = False, alpha=0.005)
+    world.append(p1)
+    world.append(p2)
+
+    environ = Holder()
+    environ.world = world
+    
+
+    ew = EvolveWeights(world, popsize=100, maxgenerations=1000, tournsize=75, tournk=3, normalize_fitness=False)
+    ew.driver()
+
+    
+
+if __name__ == "__main__":
+    print("pe.py start...")
+
+
+    puzzles_exp_1()
+    
+    print("pe.py done.")
--- a/code/ress.py
+++ b/code/ress.py
@ -0,0 +1,254 @@
+"""RESS.py
+
+Random Equal Stimulus Sets
+
+Originally coded in Object Pascal for Delphi by Wesley R. Elsberry
+around 1999.
+
+Translation to Python 3 by ChatGPT (GPT-4) 2023-06-01.
+
+Random Equal Stimulus Sets are sequences of numbers indicating one of
+a set of stimuli to be presented to a subject in a cognitive or
+psychophysics task. The basic rules for generating these sequences is
+derived from Gellermann 1925(?), but modified to permit the
+specification of more than two stimuli in the set.  The restriction on
+a maximum of three sequential presentations of the same stimulus is
+retained.
+
+Issues: 
+  The 'next_yield' method does not work.
+  Using 'next' for a sequence longer than the defined length of
+    sequence can cause there to be sequences that violate Gellermann's 
+    assumptions, as the sequences composed together are not tested
+    across the joins.
+
+"""
+
+import sys
+import os
+import traceback
+
+import random
+
+MAXRESS = 120 # Arbitrary maximum
+
+class RESS:
+    """
+    RESS class represents the equivalent of the Pascal unit 'ress' in Python.
+
+Random Equal Stimulus Sets are sequences of numbers indicating one of
+a set of stimuli to be presented to a subject in a cognitive or
+psychophysics task. The basic rules for generating these sequences is
+derived from Gellermann 1925(?), but modified to permit the
+specification of more than two stimuli in the set.  The restriction on
+a maximum of three sequential presentations of the same stimulus is
+retained.
+
+    """
+
+    def __init__(self):
+        self.classes = None
+        self.thelength = None
+        self.series = [0] * MAXRESS
+        self.lastseries = [0] * MAXRESS
+        self.cnt = None
+        self.seriesstr = ""
+        self.current = None
+        self.dummy = None
+        self.hist = [0] * 61
+
+    def init(self):
+        """
+        Initializes the variables in TRESS.
+        """
+        self.classes = 1
+        self.thelength = 0
+        self.series = [0] * MAXRESS
+        self.lastseries = [0] * MAXRESS
+        self.hist = [0] * 61
+        self.cnt = 0
+        self.seriesstr = ""
+        self.dummy = 0
+
+    def makestring(self):
+        """
+        Creates a string representation of the series.
+        Returns:
+            The string representation of the series.
+        """
+        tstr = ""
+        for val in self.series[1:self.thelength + 1]:
+            tstr += str(val)
+        self.seriesstr = tstr
+        return tstr
+
+    def generate(self, len, nclass):
+        """
+        Generates a candidate series.
+        Args:
+            len: The length of the series.
+            nclass: The number of classes.
+        """
+        self.cnt = 0
+        self.classes = nclass
+
+        # Constraint: sequence length less than maximum
+        if MAXRESS >= len:
+            self.thelength = len
+        else:
+            self.thelength = MAXRESS
+
+        # Constraint: Multiple of number of classes
+        if self.thelength % self.classes != 0:
+            self.thelength -= self.thelength % self.classes
+
+        for i in range(self.classes):
+            self.hist[i] = self.thelength // self.classes
+
+        self.series[0] = random.randint(0, self.classes - 1)
+        self.hist[self.series[0]] -= 1
+
+        run = 1
+        for i in range(1, self.thelength):
+            ctr = 0
+            while True:
+                ctr += 1
+                jj = random.randint(0, self.classes - 1)
+                if self.hist[jj] > 0:
+                    shortrun = (self.series[i - 1] == jj and run < 3) or (self.series[i - 1] != jj)
+                    break
+                if ctr > 100:
+                    break
+            if self.series[i - 1] == jj:
+                run += 1
+            else:
+                run = 1
+            self.hist[jj] -= 1
+            self.series[i] = jj
+
+    def test(self):
+        """
+        Tests candidates for criteria.
+        Returns:
+            True if the series is valid, False otherwise.
+        """
+        ok = True
+        hist = [0] * 61
+
+        for val in self.series[:self.thelength]:
+            hist[val] += 1
+
+        for i in range(self.classes - 1):
+            if hist[i] != hist[i + 1]:
+                ok = False
+
+        if ok:
+            run = 1
+            for i in range(1, self.thelength):
+                if self.series[i - 1] == self.series[i]:
+                    run += 1
+                    if run > 3:
+                        ok = False
+                else:
+                    run = 1
+
+        return ok
+
+    def newress(self, nlen=24, nclass=2):
+        """
+        Finds and saves a valid series using generate and test.
+        Args:
+            nlen: The length of the series.
+            nclass: The number of classes.
+        """
+        print('nlen', nlen, 'nclass', nclass)
+        try:
+        
+            random.seed()
+
+            self.lastseries = self.series
+
+            while True:
+                self.generate(nlen, nclass)
+                # print("gen", self.makestring())
+                if self.test():
+                    break
+            return self.makestring()
+        except:
+            estr = f"Error: {traceback.format_exc()}"
+            print(estr)
+            return ''
+
+    def next(self):
+        """
+        Returns the next value within a series.
+        Returns:
+            The next value in the series.
+        """
+        if self.cnt >= self.thelength:
+            self.newress(self.thelength, self.classes)
+
+        self.cnt += 1
+        self.current = self.series[self.cnt]
+        return self.series[self.cnt]
+
+    def next_yield(self):
+        """
+        Yields the next value within a series.
+        """
+        print('start', self.series, self.cnt, self.series[self.cnt])
+        
+        while True:
+            if self.cnt >= self.thelength:
+                print("calling newress")
+                self.newress(self.thelength, self.classes)
+                self.cnt = 0
+
+            print(self.cnt)
+            print(self.series, self.cnt, self.series[self.cnt])
+            self.current = self.series[self.cnt]
+            yield str(self.current)
+            self.cnt += 1
+    
+# Exercise the TRESS code
+
+from random import seed
+
+def main():
+    # Set the seed for random number generation
+    seed()
+
+    # Create an instance of the TRESS class
+    ress1 = RESS()
+
+    # Initialize the TRESS object
+    ress1.init()
+
+    # Generate and print a valid series
+    ress1.newress(24, 3)
+    series = ress1.makestring()
+    print("Generated Series:", series)
+
+    ress1.newress(24, 3)
+    series = ress1.makestring()
+    print("Generated Series:", series)
+
+    ress1.newress(24, 3)
+    series = ress1.makestring()
+    print("Generated Series:", series)
+
+    ress1.newress(24, 3)
+    series = ress1.makestring()
+    print("Generated Series:", series)
+
+    ress1.newress(24, 3)
+    series = ress1.makestring()
+    print("Generated Series:", series)
+
+    # Generate and print the next value in the series
+    for ii in range(26):
+        next_val = ress1.next()
+        print(ii, "Next Value:", str(next_val))
+
+if __name__ == "__main__":
+    main()
--- a/deactivate_env.sh
+++ b/deactivate_env.sh
@ -0,0 +1 @@
+micromamba deactivate
--- a/jupyter.sh
+++ b/jupyter.sh
@ -0,0 +1,8 @@
+UMAMBA_PATH="umamba_env"
+if [ ! -d "$UMAMBA_PATH" ]; then
+	echo "no $UMAMBA_PATH found"
+	. ./update_env.sh
+fi
+. ./activate_env.sh
+micromamba activate curio
+jupyter-lab
--- a/notebooks/gridworld.ipynb
+++ b/notebooks/gridworld.ipynb
@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf316089-5339-4ac8-b0e2-3618fe06a593",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np, itertools, copy\n",
+    "import matplotlib.pyplot as plt\n",
+    "from collections import defaultdict\n",
+    "import importlib # module reloading\n",
+    "\n",
+    "# allow importing from the 'code/' dir\n",
+    "import sys\n",
+    "sys.path.append(\"../code\")\n",
+    "\n",
+    "import environments\n",
+    "import agents\n",
+    "# always forces a reload in case you have edited environments or agents\n",
+    "importlib.reload(environments)\n",
+    "importlib.reload(agents)\n",
+    "from environments.gridworld import GridWorld\n",
+    "from agents.q_agent import Agent\n",
+    "\n",
+    "# problem domain dependent settings\n",
+    "dims = [4,12]\n",
+    "obsSpace, actSpace = (dims[0], dims[1]), (4,)\n",
+    "num_trials=1000\n",
+    "n_actions = 4\n",
+    "#(optimal lmbda in the agent is domain dependent - could be evolved)\n",
+    "HARD_TIME_LIMIT = 50\n",
+    "KILLED_REWARD = -10\n",
+    "#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n",
+    "#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n",
+    "\n",
+    "# create our own GridWorld that adheres to openAI-gym environment API during training\n",
+    "env = GridWorld(dims = dims, startState = [3,0])\n",
+    "\n",
+    "# 4rows x 12columns (0,0) is top-left\n",
+    "# -: empty location\n",
+    "# S: Start location\n",
+    "# G: Goal location\n",
+    "# x: immediate fail (a hole / cliff)\n",
+    "#\n",
+    "# (map of grid world)\n",
+    "# ------------\n",
+    "# ------------\n",
+    "# ------------\n",
+    "# SxxxxxxxxxxG\n",
+    "\n",
+    "# add goals and holes\n",
+    "# supports multiple goals, use 1 for now\n",
+    "env.goals.append([3,11])\n",
+    "# support multiple 'kill zones' (cliff edge, in openAI parlance)\n",
+    "for i in range(1,11):\n",
+    "    env.holes.append([3,i])\n",
+    "    \n",
+    "agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n",
+    "# alpha     # how much to weigh reward surprises that deviate from expectation\n",
+    "# gamma     # how important exepcted rewards will be\n",
+    "# epsilon   # fraction of exploration to exploitation (how often to choose a random action)\n",
+    "# lmbda     # how slowly memory of preceeding actions fades away (1=never, 0=\n",
+    "\n",
+    "\n",
+    "time_to_solve_each_trial = [] # lower is better\n",
+    "for trialN in range(num_trials):\n",
+    "    # some output to see it running\n",
+    "    if (trialN % 10) == 0: print('.',end='')\n",
+    "    # initialize the agent, environment, and time for this trial\n",
+    "    agent.reset() # soft-reset() (keeps learned weights)\n",
+    "    nextState = env.reset()\n",
+    "    time = 0\n",
+    "    while True:\n",
+    "        time += 1\n",
+    "        # set agent senses based on environment and allow agent to determine an action\n",
+    "        agent.sensoryState = nextState\n",
+    "        agent.plasticUpdate()\n",
+    "        # determine effect on environment state & any reward (in standard openAI-gym API format)\n",
+    "        nextState, reward, goal_achieved, _ = env.step(agent.action)\n",
+    "        agent.reward = reward\n",
+    "        if goal_achieved or time == HARD_TIME_LIMIT: break\n",
+    "        # stop trial if agent explitly failed early\n",
+    "        elif reward <= KILLED_REWARD:\n",
+    "            agent.sensoryState = nextState\n",
+    "            agent.reward = reward\n",
+    "            agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward\n",
+    "            agent.reset()\n",
+    "            nextState = env.reset()\n",
+    "    # record trial results\n",
+    "    time_to_solve_each_trial.append(time)\n",
+    "    \n",
+    "print()\n",
+    "plt.plot(time_to_solve_each_trial);\n",
+    "pt=15 # font point\n",
+    "plt.title('Time until agent solved trial', fontsize=pt)\n",
+    "plt.xlabel('Trial', fontsize=pt)\n",
+    "plt.ylabel('Time', fontsize=pt)\n",
+    "\n",
+    "# show path agent took in GridWorld using non-learning agent (staticUpdate())\n",
+    "print(\"green dot: start location\")\n",
+    "print(\"red dot: finish location\")\n",
+    "env.render(agent)\n",
+    "#render(agent,env)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d54a622f-42e4-4384-bf9a-0f0181301c3c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/puzzlev0.ipynb
+++ b/notebooks/puzzlev0.ipynb
@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b067867a-c1bc-4769-a6ac-15e7277ab8e2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np, itertools, copy\n",
+    "import matplotlib.pyplot as plt\n",
+    "from collections import defaultdict\n",
+    "import importlib # module reloading\n",
+    "\n",
+    "# allow importing from the 'code/' dir\n",
+    "import sys\n",
+    "sys.path.append(\"../code\")\n",
+    "\n",
+    "import environments\n",
+    "import agents\n",
+    "# always forces a reload in case you have edited environments or agents\n",
+    "importlib.reload(environments)\n",
+    "importlib.reload(agents)\n",
+    "from environments.puzzle import Puzzle, ConvBelt, Action, getActionSpace, getObservationSpace\n",
+    "from agents.q_agent import Agent\n",
+    "\n",
+    "import copy # allows duplicating puzzles into unique puzzles, otherwise python refs are shallow-copied\n",
+    "maxrewards = [1] # could have multiple levels of 'goodness'\n",
+    "\n",
+    "# Create a puzzle with 4 states:\n",
+    "#                        state 0: first presentation\n",
+    "#                        state 1: getting passed over, advancing on belt (not really a state, more a placeholder)\n",
+    "#                        state 2: investigated (more sensory information is available when examined closely)\n",
+    "#                        state 3: consumed (saturating state with possible reward)\n",
+    "easy_puzzle_tt = np.array([[0,0,2,3],  # state 0: first presentation\n",
+    "                           [0,0,0,0],  # state 1: getting passed over (placeholder)\n",
+    "                           [2,0,2,3],  # state 2: investigated\n",
+    "                           [3,3,3,3]]) # state 3: consumed\n",
+    "# example puzzle with 2 sensorial dimensions\n",
+    "easy_puzzle_features = [[0,1], # state 0: Empty/Unknown & Spikes\n",
+    "                        [0,1], # state 1: Empty/Unknown & Spikes\n",
+    "                        [3,1], # state 2: Red & Spikes\n",
+    "                        [0,0]] # state 3: Empty/Unknown & Empty/Unknown\n",
+    "easy_puzzle_rewards = [-1, # state 0: first look\n",
+    "                       -1, # state 1: proceeding to next puzzle (placeholder)\n",
+    "                       -1, # state 2: investigate\n",
+    "                       1]  # state 3: consume (could be -10 poisonous! or -1 empty/useless)\n",
+    "p1 = Puzzle(tt = easy_puzzle_tt,\n",
+    "            features = easy_puzzle_features,\n",
+    "            rewards = easy_puzzle_rewards)\n",
+    "p2 = copy.deepcopy(p1)\n",
+    "puzzles = (p1,p2)\n",
+    "\n",
+    "\n",
+    "obsSpace = getObservationSpace(puzzles)\n",
+    "actSpace = getActionSpace(puzzles)\n",
+    "\n",
+    "\n",
+    "env = ConvBelt(actionSpace = getActionSpace(puzzles), # indicate number of actions agent can take\n",
+    "         observationSpace = getObservationSpace(puzzles), # indicate number of sensorial dimensions and sizes\n",
+    "         maxRewards = maxrewards, # rewards that constitute postive rewards\n",
+    "         randomize = False, # randomize puzzle positions on belt at each reset()\n",
+    "        )\n",
+    "\n",
+    "# can use append() or extend()\n",
+    "env.append(p1)\n",
+    "env.append(p2)\n",
+    "\n",
+    "# domain-specific settings\n",
+    "num_trials=200\n",
+    "n_actions = 4\n",
+    "#(optimal lmbda in the agent is domain dependent - could be evolved)\n",
+    "HARD_TIME_LIMIT = 600\n",
+    "#KILLED_REWARD = -10 # not used here\n",
+    "#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n",
+    "#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n",
+    "\n",
+    "agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n",
+    "# alpha     # how much to weigh reward surprises that deviate from expectation\n",
+    "# gamma     # how important exepcted rewards will be\n",
+    "# epsilon   # fraction of exploration to exploitation (how often to choose a random action)\n",
+    "# lmbda     # how slowly memory of preceeding actions fades away (1=never, 0=\n",
+    "\n",
+    "time_to_solve_each_trial = []\n",
+    "rewards = []\n",
+    "\n",
+    "for trialN in range(num_trials):\n",
+    "    # some output to see it running\n",
+    "    if (trialN % 10) == 0: print('.',end='')\n",
+    "    # initialize the agent, environment, and time for this trial\n",
+    "    agent.reset() # soft-reset() (keeps learned weights)\n",
+    "    nextState = env.reset()\n",
+    "    time = 0\n",
+    "    while True:\n",
+    "        time += 1\n",
+    "        # set agent senses based on environment and allow agent to determine an action\n",
+    "        agent.sensoryState = nextState\n",
+    "        agent.plasticUpdate()\n",
+    "        # determine effect on environment state & any reward (in standard openAI-gym API format)\n",
+    "        nextState, reward, goal_achieved, _ = env.step(agent.action)\n",
+    "        agent.reward = reward\n",
+    "        if env.puzzlesLeftToComplete == 0 or time == HARD_TIME_LIMIT:\n",
+    "            agent.plasticUpdate()\n",
+    "            break\n",
+    "        # could have deadly rewards that stop the trial early\n",
+    "        #elif reward <= -10:\n",
+    "        #    agent.sensoryState = nextState\n",
+    "        #    agent.reward = reward\n",
+    "        #    agent.plasticUpdate()\n",
+    "        #    agent.reset()\n",
+    "        #    nextState = env.reset()\n",
+    "        rewards.append(reward)\n",
+    "    time_to_solve_each_trial.append(time)\n",
+    "    \n",
+    "    \n",
+    "print()\n",
+    "print(list(agent.weights.round(3)))\n",
+    "#print(agent.timeSinceBigSurprise)\n",
+    "plt.figure(figsize=(16,4),dpi=200)\n",
+    "plt.plot(time_to_solve_each_trial)\n",
+    "pt=15 # font point\n",
+    "plt.title('Time until agent solved trial (puzzle boxes)', fontsize=pt)\n",
+    "plt.xlabel('Trial', fontsize=pt)\n",
+    "plt.ylabel('Time', fontsize=pt)\n",
+    "#figure()\n",
+    "#plot(rewards)\n",
+    "env.render(agent);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e22a5e6-47fb-45c0-905f-3fb5b6cc3980",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/papers/bibliography.bib
+++ b/papers/bibliography.bib
@ -0,0 +1,40 @@
+% uses machine learning to facilitate automatic olfactory classification.
+% Intro discusses how smells are smelled.
+% PDF: https://arxiv.org/pdf/1906.07067
+@article{imam2020rapid,
+  title={Rapid online learning and robust recall in a neuromorphic olfactory circuit},
+  author={Imam, Nabil and Cleland, Thomas A},
+  journal={Nature Machine Intelligence},
+  volume={2},
+  number={3},
+  pages={181--191},
+  year={2020},
+  publisher={Nature Publishing Group}
+}
+
+% PDF: https://search.proquest.com/docview/1297102848?pq-origsite=gscholar&imgSeq=1
+@article{gellermann1933chance,
+  title={Chance orders of alternating stimuli in visual discrimination experiments},
+  author={Gellermann, Louis W},
+  journal={The journal of genetic psychology},
+  volume={42},
+  pages={206--208},
+  year={1933},
+  publisher={Journal Press, etc.}
+}
+
+% PDF: https://static1.squarespace.com/static/5b82081250a54f02ee0758c8/t/5b8ed5a04fa51a484aa907ee/1536087459872/tinbergen+original.pdf
+% Also uploaded to repository.
+@article{Tinbergen1963Jan,
+	author = {Tinbergen, N.},
+	title = {{On aims and methods of Ethology}},
+	journal = {Z. Tierpsychol.},
+	volume = {20},
+	number = {4},
+	pages = {410--433},
+	year = {1963},
+	month = {Jan},
+	issn = {0044-3573},
+	publisher = {John Wiley {\&} Sons, Ltd},
+	doi = {10.1111/j.1439-0310.1963.tb01161.x}
+}
--- a/papers/narrative.pdf
+++ b/papers/narrative.pdf
--- a/papers/week_02_tinbergen_on_aims_and_methods_of_ethology_zft_1963.pdf
+++ b/papers/week_02_tinbergen_on_aims_and_methods_of_ethology_zft_1963.pdf
--- a/requirements-conda.txt
+++ b/requirements-conda.txt
@ -0,0 +1,7 @@
+python=3.11
+jupyter
+numpy
+matplotlib
+plotnine
+nodejs
+deap
--- a/requirements-pip.txt
+++ b/requirements-pip.txt
@ -0,0 +1 @@
+jupyterlab
--- a/update_env.sh
+++ b/update_env.sh
@ -0,0 +1,47 @@
+
+OS="linux"
+if [[ "$OSTYPE" == "darwin"* ]]; then
+        OS="osx"
+fi
+
+ARCH="64"
+if [[ "$(uname -m)" == "aarch64" ]]; then
+	if [[ "$OS" == "osx" ]]; then
+		ARCH="arm64"
+	else
+		ARCH="aarch64"
+	fi
+fi
+
+SYSTEM="$OS-$ARCH"
+
+# conda deactivate in case they have a conda env
+# micromamba deactivate in case they have a micromamba env
+conda deactivate &>/dev/null
+micromamba deactivate &>/dev/null
+
+UMAMBA_PATH="umamba_env"
+if [ ! -d "umamba_env" ]; then
+	# download micromamba
+	echo "downloading micromamba to $UMAMBA_PATH/ ..."
+	curl -Ls https://micro.mamba.pm/api/micromamba/${SYSTEM}/latest | tar -xvj bin/micromamba
+	mv bin $UMAMBA_PATH
+	# activate micromamba
+	export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
+	eval "$(./umamba_env/micromamba shell hook -s posix)"
+	# create the project environment
+	echo "creating 'curio' environment"
+	micromamba create -n curio -c conda-forge
+	micromamba activate curio
+else
+	echo "found micromamba at $UMAMBA_PATH"
+	micromamba activate curio
+	export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
+	eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)"
+fi
+echo "installing packages"
+# install conda requirements
+micromamba install --yes $(tr '\n' ' ' < requirements-conda.txt) -c conda-forge
+# install pip requirements
+pip install --no-input -r requirements-pip.txt
+micromamba deactivate