diff --git a/README.md b/README.md index b4ce220..2b14882 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,26 @@ -# alice +# curiosity + +### quickstart +* `. ./jupyter.sh` runs jupyter-lab (setting everything up if necessary) + +Note, run scripts using `source ` or `. ./` + +### helper scripts +* `. ./update_env.sh` creates or updates the project python environment +* `. ./activate_env.sh` activate the project environment (calling update if missing) +* `. ./deactivate_env.sh` deactivate the project environment +* `. ./jupyter.sh` runs jupyter-lab (calling activate for safety) + +### structure + +``` +├── code +│   ├── agents/ # agent algorithms +│   ├── environments/ # test environments +│   └── evolve.py # sample evolution code +├── notebooks/ # example notebooks +├── papers/ # useful shared docs +├── requirements-conda.txt # conda project dependencies +├── requirements-pip.txt # pip project dependencies (sometimes necessary) +``` -ALICE is a project to explore curiosity in a model incorporating both reinforcement learning and evolutionary processes. \ No newline at end of file diff --git a/activate_env.sh b/activate_env.sh new file mode 100644 index 0000000..d50d967 --- /dev/null +++ b/activate_env.sh @@ -0,0 +1,13 @@ +# conda deactivate in case they have a conda env +# micromamba deactivate in case they have a micromamba env +conda deactivate &> /dev/null +micromamba deactivate &>/dev/null + +UMAMBA_PATH="umamba_env" +if [ ! -d "$UMAMBA_PATH" ]; then + echo "no $UMAMBA_PATH found" + . ./update_env.sh +fi +export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH +eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)" +micromamba activate curio diff --git a/code/__init__.py b/code/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/agents/q_agent.py b/code/agents/q_agent.py new file mode 100755 index 0000000..bc0a16b --- /dev/null +++ b/code/agents/q_agent.py @@ -0,0 +1,365 @@ +""" +q_agent.py +This submodule contains the Agent class, which implements a Q-learning agent with eligibility traces (TD-lambda). The agent learns to make decisions based on its sensory state and rewards received from the environment. The agent uses an epsilon-greedy action-selection strategy. + +Usage: +import q_agent + +Class: +Agent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96) + +Attributes: +obsSpace (tuple): The shape of the observation space. +actSpace (tuple): The shape of the action space. +ftrSpace (tuple): The shape of the feature space. +n_features (int): The total number of features. +n_actions (int): The total number of actions. +weights (numpy.ndarray): The Q-function weights. +trace (numpy.ndarray): The eligibility trace for each feature. +featureToIndexMap (numpy.ndarray): A mapping from feature indices to the corresponding weights. +allActions (list): A list of all possible actions. +alpha (float): The learning rate for updating weights. +gamma (float): The discount factor for future rewards. +epsilon (float): The exploration rate for epsilon-greedy action selection. +lmbda (float): The decay factor for eligibility traces. +sensoryState (numpy.ndarray): The current sensory state of the agent. +previousSensoryState (numpy.ndarray): The previous sensory state of the agent. +action (int): The current action taken by the agent. +previousAction (int): The previous action taken by the agent. +episoden (int): The episode number the agent is in. +recentReset (bool): Indicates if the agent was recently reset. + +Methods: +reset(): +Resets the agent's traces, sensory states, and actions. + +predictPayoffsForAllActions() -> List[float]: + Predicts the expected payoffs for all possible actions given the current sensory state. + +plasticUpdate(): + Updates the agent's Q-function weights and eligibility traces based on the current sensory state, action, and received reward. Uses epsilon-greedy action selection. + +staticUpdate(): + Updates the agent's action based on the current sensory state without updating weights or traces. Uses greedy action selection. + +Examples: +>>> from q_agent import Agent +>>> obsSpace, actSpace = (2, 2), (3,) +>>> agent = Agent(obsSpace=obsSpace, actSpace=actSpace) +""" + +import traceback + +import numpy as np +from collections import defaultdict +from typing import List, Tuple, Union + +from deap import creator, base, tools, algorithms + +LOGGING = False + +import logging, sys +logging.basicConfig(stream=sys.stdout,level=logging.INFO) +log = logging.getLogger() + +if not LOGGING: + # remove all logging functionality + for handler in log.handlers.copy(): + try: + log.removeHandler(handler) + except ValueError: # in case another thread has already removed it + pass + log.addHandler(logging.NullHandler()) + log.propagate = False + + +# The Agent class, similar to what +# is used in MABE. Note: this is unlike +# how standard RLML folks structure these +# algorithms. Here, we separate out concerns +# for modularity. A side-effect is that the +# update() (one cognitive step) receives the reward +# for the previous update-action. This means 1 extra +# update must be called if terminating. +class Agent(): + + + def __init__(i, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96): + i.obsSpace = np.array(obsSpace) + i.actSpace = np.array(actSpace) + i.ftrSpace = tuple(obsSpace)+tuple(actSpace) + i.n_features = np.prod(i.ftrSpace) + i.n_actions = actSpace[0] # not general + i.weights = np.zeros(i.n_features) + i.trace = np.zeros(i.n_features) + i.featureToIndexMap = np.arange(i.n_features).reshape(i.ftrSpace) + i.allActions = list(range(i.n_actions)) + # new + i.alpha = alpha # how much to weigh reward surprises that deviate from expectation + i.gamma = gamma # how important exepcted rewards will be + i.epsilon = epsilon # fraction of exploration to exploitation (how often to choose a random action) + i.lmbda = lmbda # how important preceeding actions are in learning adaptation + i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32) + i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32) + i.action = 0 + i.previousAction = 0 + i.episoden = 0 + i.recentReset = True + + + def reset(i): # only resets traces + log.info("resetting agent") + i.trace = np.zeros(i.n_features) + i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32) + i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32) + i.action = 0 + i.previousAction = 0 + i.reward = -1 + i.recentReset = True + + + def predictPayoffsForAllActions(i) -> List[float]: + '''combines current sensoryState and all possible actions to return all possible payoffs by action + >>> obsSpace, actSpace, ftrSpace = (2,2), (3,), (2,2)+(3,) + >>> i = Agent(obsSpace=obsSpace, actSpace=actSpace) + >>> (i.featureToIndexMap == np.arange(i.n_features).reshape((2,2,3))).all() + True + >>> i.sensoryState[:] = [1,0] + >>> i.weights = np.zeros(12) + >>> i.weights[6:9] = [1.,2.,3.] # weights associated with features (1,0,) with actions 0,1,2 + >>> i.predictPayoffsForAllActions() + [1.0, 2.0, 3.0] + ''' + #print(i.sensoryState, i.allActions) + try: + featureKeys = [tuple(i.sensoryState)+(action,) for action in i.allActions] + # featuresForEachAction = [i.featureToIndexMap[tuple(i.sensoryState)+(action,)] for action in i.allActions] + featuresForEachAction = [i.featureToIndexMap[fki] for fki in featureKeys] + #print('featureToIndexMap', i.featureToIndexMap) + #print('featureKeys', featureKeys) + #print('sensoryState', i.sensoryState, 'allActions', i.allActions) + return [i.weights[features].sum() for features in featuresForEachAction] + except: + estr = f"Error: {traceback.format_exc()}" + print(estr) + print('featureToIndexMap', i.featureToIndexMap) + print('featureKeys', featureKeys) + print('sensoryState', i.sensoryState, 'allActions', i.allActions) + return [np.nan for x in range(len(i.allActions))] + + + + def plasticUpdate(i): + # This algorithm is a TD-lambda algorithm + # with epsilon-greedy action-selection + # (could use annealing of the epsilon - I removed it again) + + # determine predicted payoff + nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff + nextAction = 0 + # epsilon-greedy action-selection + # choose random + if np.random.random() < i.epsilon: # random + nextAction = np.random.choice(i.n_actions) + else: # choose best + try: + q_vals = i.predictPayoffsForAllActions() + nextAction = np.argmax(q_vals) + if i.reward >= 0.0: # goal achieved + nextActionPredictedPayoff = 0.0 + else: + nextActionPredictedPayoff = q_vals[nextAction] + except: + estr = f"Error: {traceback.format_exc()}" + print(estr) + print("q_vals", q_vals) + # only update weights if accumulated at least 1 experience + if not i.recentReset: + # determine the corrected payoff version given the reward actually received + previousActionCorrectedPayoff = i.reward + (nextActionPredictedPayoff * i.gamma) + # use this information to update weights for last action-selection based on how surprised we were + features = i.featureToIndexMap[tuple(i.previousSensoryState)+(i.action,)] + previousActionPredictedPayoff = i.weights[features].sum() + surprise = previousActionCorrectedPayoff - previousActionPredictedPayoff + # do weight updates + i.trace[features] = 1.0 + # do trace updates + i.weights += i.alpha * surprise * i.trace + i.trace *= i.lmbda + # keep track of state and action t, t-1 + i.previousSensoryState = i.sensoryState[:] + i.action = nextAction + i.recentReset = False + + + def staticUpdate(i): + # same as plasticUpdate, but without learning + # (a.k.a. 'deployment') + + # determine predicted payoff + nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff + nextAction = 0 + # greedy action-selection + q_vals = i.predictPayoffsForAllActions() + nextAction = np.argmax(q_vals) + # step the storage of state and action in memory + i.previousSensoryState = i.sensoryState[:] + i.action = nextAction + + +""" +This derived class adds a mutation_rate attribute, as well as methods for mutation, crossover, and fitness handling. You can then use an evolutionary algorithm to evolve a population of EvolvableAgent instances by applying selection, crossover, and mutation operations based on the agents' fitness values. +""" + +def tuple_shape(input_tuple): + if not isinstance(input_tuple, tuple): + try: + return input_tuple.shape + except: + raise TypeError("Input must be a tuple") + + # Check if the tuple is nested (i.e., if it's a multidimensional tuple) + if any(isinstance(item, tuple) for item in input_tuple): + shape = [] + while isinstance(input_tuple, tuple): + shape.append(len(input_tuple)) + input_tuple = input_tuple[0] + return tuple(shape) + else: + return (len(input_tuple),) + +class Holder(object): + def __init__(self): + pass + +class EvolvableAgent(Agent): + """ EvolvableAgent +This class extends the Agent class from q_agent.py, adding functionality for evolutionary computation. The EvolvableAgent class can be used with evolutionary algorithms to optimize the agent's performance through mutation, crossover, and selection based on fitness values. + +Usage: +import EvolvableAgent + +Class: +EvolvableAgent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, mutation_rate=0.05) + +Attributes (in addition to Agent attributes): +mutation_rate (float): The probability of each weight being mutated during mutation. +fitness (float): The fitness value of the agent, used for evaluation and selection in an evolutionary algorithm. + +Methods (in addition to Agent methods): +mutate(): +Mutates the agent's weights by adding small random values, drawn from a normal distribution. The mutation_rate attribute determines the probability of each weight being mutated. + +csharp +Copy code +crossover(other: 'EvolvableAgent') -> 'EvolvableAgent': + Performs uniform crossover between this agent and another agent, creating a new offspring agent. + Args: + other (EvolvableAgent): The other agent to perform crossover with. + Returns: + EvolvableAgent: The offspring agent resulting from the crossover. + +set_fitness(fitness: float): + Sets the fitness value for the agent. + Args: + fitness (float): The fitness value to be set. + +get_fitness() -> float: + Gets the fitness value of the agent. + Returns: + float: The fitness value of the agent. +Examples: +>>> from EvolvableAgent import EvolvableAgent +>>> obsSpace, actSpace = (2, 2), (3,) +>>> agent = EvolvableAgent(obsSpace=obsSpace, actSpace=actSpace, mutation_rate=0.05) +""" + def __init__(self, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, \ + mutation_rate=0.05, crossover_rate=0.01, fitness=None): + # obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96 + super().__init__(obsSpace, actSpace, alpha, gamma, epsilon, lmbda) + self.germline = self.weights + self.mutation_rate = mutation_rate + self.crossover_rate = crossover_rate + self.wfitness = None + self.fitness = fitness + self.init_fitness = fitness + + def mutate(self): + """ + Mutate the agent's weights by adding small random values, drawn from a normal distribution. + The mutation_rate attribute determines the probability of each weight being mutated. + """ + wtshape = self.weights.shape + glshape = self.germline.shape + mutation_mask = np.random.random(self.germline.shape) < self.mutation_rate + self.germline[mutation_mask] += np.random.normal(loc=0, scale=0.01, size=np.sum(mutation_mask)) + self.weights = self.germline + assert glshape == self.germline.shape, "Error: mutate() germline shape has changed" + assert wtshape == self.weights.shape, "Error: mutate() weights shape has changed" + + def crossover(self, other: 'EvolvableAgent') -> 'EvolvableAgent': + """ + Perform uniform crossover between this agent and another agent, creating a new offspring agent. + Args: + other (EvolvableAgent): The other agent to perform crossover with. + Returns: + EvolvableAgent: The offspring agent resulting from the crossover. + """ + wtshape = self.weights.shape + glshape = self.germline.shape + offspring = EvolvableAgent(self.obsSpace, self.actSpace, self.alpha, self.gamma, self.epsilon, self.lmbda, self.mutation_rate, self.crossover_rate, self.init_fitness4) + if np.random.random() <= self.crossover_rate: + crossover_mask = np.random.randint(0, 2, size=self.germline.shape, dtype=bool) + offspring.germline = np.where(crossover_mask, self.germline, other.germline) + else: + offspring.germline = self.germline + offspring.weights = offspring.germline + assert self.obsSpace.shape == offspring.obsSpace.shape, f"Error: offspring has different obsSpace {offspring.obsSpace} != {self.obsSpace}" + assert self.actSpace.shape == offspring.actSpace.shape, f"Error: offspring has different actSpace {offspring.actSpace} != {self.actSpace}" + assert tuple_shape(self.ftrSpace) == tuple_shape(offspring.ftrSpace), f"Error: offspring had different ftrSpace {offspring.ftrSpace} {offspring.obsSpace} {offspring.actSpace} != {self.ftrSpace} {self.obsSpace} {self.actSpace}" + assert glshape == offspring.germline.shape, "Error: offspring germline shape has changed" + assert wtshape == offspring.weights.shape, "Error: offspring weights shape has changed" + return offspring + + def set_wfitness(self, fitness: float): + """ + Set the fitnevss value for the agent. + Args: + fitness (float): The fitness value to be set. + """ + self.wfitness = fitness + + def get_wfitness(self) -> float: + """ + Get the fitness value of the agent. + Returns: + float: The fitness value of the agent. + """ + return self.wfitness + + def set_fitness(self, fitness: float): + """ + Set the fitness value for the agent. + Args: + fitness (float): The fitness value to be set. + """ + self.fitness.values = (fitness,) + + def get_fitness(self) -> float: + """ + Get the fitness value of the agent. + Returns: + float: The fitness value of the agent. + """ + return self.fitness.values[0] + + +if __name__ == '__main__': + '''test important functions and workflows with doctesting + run this python file by itself to run these tests, and set + LOGGING=True near top of file.''' + import doctest + from functools import partial + #doctest.testmod() + test = partial(doctest.run_docstring_examples, globs=globals()) + test(Agent.predictPayoffsForAllActions) diff --git a/code/curio_evolve_weights.py b/code/curio_evolve_weights.py new file mode 100755 index 0000000..a4d9d41 --- /dev/null +++ b/code/curio_evolve_weights.py @@ -0,0 +1,341 @@ +""" +ew.py + +Evolve Weights + +Uses DEAP to evolve a set of weights with mutation and crossover. + +Integration with other code happens via programming by contract. +The 'environ' parameter must be an object that provides two +methods: + get_weights_len : returns a scalar integer indicating the 1D vector length for weights + evaluate : accepts a weight vector, returns a tuple object containing a single fitness value (e.g., (0.5,)) +and has an attribute related to reinforcement learning for agents: + alpha + +""" + + +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime +import copy + +import numpy as np, itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +#import environments +#import agents + +# always forces a reload in case you have edited environments or agents +#importlib.reload(environments) +#importlib.reload(agents) +#from environments.gridworld import GridWorld +#import environments.puzzle as pz +#from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace + +#from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +#from mda import MultiDimArray + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + def __init__(self): + pass + +class EvolveWeights(object): + """ + Class to apply DEAP to evolve a population consisting of a set + of weights. + """ + def __init__(self, + # environ, # Instance of environ class + # What is needed from environ? + # weights_len (int) + # alpha (float) + # evaluate (method/function) + weights_len, + alpha=0.05, + evaluate=None, + popsize=100, + maxgenerations=10000, + cxpb=0.5, + mtpb=0.05, + wmin=-20.0, + wmax=20.0, + mut_center=0.0, + mut_sigma=0.1, + mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='environ' + ): + + self.tag = tag + self.starttime = isotime() + self.logbase = tag + "_" + t2fn(self.starttime) + + # Excluding environment as a parameter + # self.environ = environ + # Instead, we need to pass in weights_len, alpha, evaluate + self.weights_len = weights_len # environ.get_weights_len() + self.alpha = alpha + self.evaluate = evaluate + + self.popsize = popsize + self.maxgenerations = maxgenerations + self.cxpb = cxpb + self.mtpb = mtpb + self.wmin = wmin + self.wmax = wmax + self.mut_center = mut_center + self.mut_sigma = mut_sigma + self.mut_indpb = mut_indpb + self.tournsize = tournsize + self.tournk = tournk + self.normalize_fitness = normalize_fitness + pass + + def masv(self, pop): + mav = [] + maxs = [] + for ind in pop: + wts = [x for x in ind] + mav.append(np.mean(np.abs(wts))) + maxs.append(np.max(np.abs(wts))) + allmax = np.max(maxs) + mymasv = [x/allmax for x in mav] + return mymasv + + def cxTwoPointCopy(self, ind1, ind2): + """Execute a two points crossover with copy on the input individuals. The + copy is required because the slicing in numpy returns a view of the data, + which leads to a self overwriting in the swap operation. It prevents + :: + >>> import numpy as np + >>> a = np.array((1,2,3,4)) + >>> b = np.array((5,6,7,8)) + >>> a[1:3], b[1:3] = b[1:3], a[1:3] + >>> print(a) + [1 6 7 4] + >>> print(b) + [5 6 7 8] + """ + size = len(ind1) + cxpoint1 = random.randint(1, size) + cxpoint2 = random.randint(1, size - 1) + if cxpoint2 >= cxpoint1: + cxpoint2 += 1 + else: # Swap the two cx points + cxpoint1, cxpoint2 = cxpoint2, cxpoint1 + ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy() + return ind1, ind2 + + def zero(self): + return 0.0 + + def smallrandom(self, eps=None): + """ + Produce a small random number in [-eps .. eps]. + + A random variate in [-1 .. 1] is produced then + multiplied by eps, so the final range is in [-eps .. eps]. + + """ + if eps in [None]: + eps = self.alpha + rv = ((2.0 * random.random()) - 1.0) * eps + return rv + + def setup(self): + creator.create("FitnessMax", base.Fitness, weights=(1.0,)) + creator.create("Individual", np.ndarray, fitness=creator.FitnessMax) + + self.toolbox = base.Toolbox() + + self.pool = multiprocessing.Pool() + self.toolbox.register("map", self.pool.map) + + #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version + # self.toolbox.register("attr_float", random.random) + #self.toolbox.register("attr_float", self.zero) + self.toolbox.register("attr_float", self.smallrandom) + + self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len) + self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) + + # self.toolbox.register("evaluate", self.evaluate) + self.toolbox.register("evaluate", self.evaluate) + #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version + self.toolbox.register("mate", self.cxTwoPointCopy) + #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version + self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb) + self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk) + + def normalize_fitnesses(self, fitnesses): + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + return fitnesses + + def log_it(self, generation): + pool = self.pool + toolbox = self.toolbox + self.pool = None + self.toolbox = None + pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl" + pickle.dump(self, open(pklfn, "wb")) + self.pool = pool + self.toolbox = toolbox + + def loop(self): + self.population = self.toolbox.population(n=self.popsize) + #print(self.masv(self.population)) + NGEN=self.maxgenerations + for gen in range(NGEN): + print("generation", gen) + offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb) + # print("offspring", offspring) + # constrain genome values to [0,1] + for offspring_i,individual in enumerate(offspring): + np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax) + # print("clipped offspring", offspring) + # Evaluate the individuals with an invalid fitness (not yet evaluated) + # print("check fitness.valid") + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + # print("invalid_ind", len(invalid_ind)) + #print("setting fitness") + fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + """ + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + """ + print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + self.fitness_dist(fitnesses) + # print("update ind fitness") + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + #print("selection") + #print("offspring\n", self.masv(offspring)) + self.offspring = offspring + self.population = self.toolbox.select(offspring, k=len(self.population)) + if 0 == gen % 100: + self.log_it(gen) + + #print("population after selection\n", self.masv(self.population)) + #print("Report for generation", gen) + self.report() + + def report(self): + # post-evolution analysis + fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + self.fitnesses = fitnesses + self.sortedFitnesses = sorted(fitnesses) + self.sortedFitnesses.reverse() + self.fitness_dist(fitnesses) + + self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1] + print("best/worst w", self.bestFitness, self.worstFitness) + + self.bestGenome = tools.selBest(self.population, k=1) + # print(self.bestGenome) + + def ffmt(self, value, fmt="%3.2f"): + return fmt % value + + def fitness_dist(self, fitnesses): + listfit = [x[0] for x in fitnesses] + pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95]) + print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}") + + def driver(self): + # Initialize + self.setup() + # Generation loop + self.loop() + # Report + self.report() + self.log_it(self.maxgenerations) + print(self.masv(self.population)) + self.pool.close() + pass + +def normalized(a, axis=-1, order=2): + l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) + l2[l2==0] = 1 + return a / np.expand_dims(l2, axis) + +def normalize(v): + if 0 == len(v): + return np.nan + return v/np.linalg.norm(v) + +class MinEnv(object): + def __init__(self, wt_len=12, alpha=0.01, w=0.5): + self.alpha = alpha + self.wt_len = wt_len + self.w = w + def get_weights_len(self): + return self.wt_len + def evaluate(self, wts): + mywts = np.array([float(x) for x in wts]) + # Max entropy + return np.std(normalize(mywts))/0.30, + +def test_ew(): + env1 = MinEnv() + + ew = EvolveWeights(env1, popsize=100, maxgenerations=10, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + +if __name__ == "__main__": + print("ew.py start...") + + test_ew() + + print("ew.py done.") diff --git a/code/curio_exp1.py b/code/curio_exp1.py new file mode 100755 index 0000000..6e86580 --- /dev/null +++ b/code/curio_exp1.py @@ -0,0 +1,355 @@ +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime +import copy + +import numpy as np # , itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +import environments +import agents + +# always forces a reload in case you have edited environments or agents +importlib.reload(environments) +importlib.reload(agents) +#from environments.gridworld import GridWorld +import environments.puzzle as pz +from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace + +from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +from mda import MultiDimArray + +# RESS +from ress import RESS + +# EvolveWeights +# from ew import EvolveWeights +from curio_evolve_weights import EvolveWeights + +# Experiment +from curio_experiment import Experiment + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + def __init__(self): + pass + +if (1): + unambiguous_puzzle_spec = { + "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[2], # state 0: Green + [2], # state 1: Green (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + + ambiguous_puzzle_spec = { + "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + + specdict = { + 'unambiguous_puzzle_spec': unambiguous_puzzle_spec, + 'ambiguous_puzzle_spec': ambiguous_puzzle_spec, + } + + + exp_schedule = { + "setlist": [ + { + "desc": "Initial puzzle set", + "specs": ['unambiguous_puzzle_spec'], + "turns": 50, # How many turns for 'lifetime learning' + # Needs to be passed to the agent + "num_stimuli": 6, # How many puzzles? Or how many different features? + # Might just be number of 'features' in puzzle spec + # We do not need to manually specify puzzle feature number + "sequence_type": "fixed", # + "probs": [[1.0], [1.0]] # + }, + { + "desc": "Stochastic puzzle sets", + "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'], + "turns": 200, + "num_stimuli": 6, + "sequence_type": "stochastic", + "probs": [[1.0, 0.0], [0.0, 1.0]] + }, + ] + } + + +def make_puzzle_list(*args, **kwargs): + """ + """ + # Sanity checks + req_params = ['specdict', 'schedule'] + + paramsvalid = True + + for rpi in req_params: + if not rpi in kwargs: + paramsvalid = False + print("make_puzzle_list missing", rpi) + assert paramsvalid, f"Error: Missing a required parameter. Quitting." + + specdict = kwargs['specdict'] + schedule = kwargs['schedule'] + + puzzles = [] + + upress = RESS() # Random Equal Stimulus Sets instance + + for seti in schedule['setlist']: + num_sets = len(seti['specs']) + num_stimuli = seti['num_stimuli'] + num_turns = seti['turns'] + seqtype = seti['sequence_type'] + probs = seti['probs'] + + if 1 == num_sets: + # Simple, just repeat the puzzle num_stimuli * times + + pass + else: + + pass + + + + +def exp1_environment(*args, **kwargs): + + unambiguous_puzzle_spec = { + "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[2], # state 0: Green + [2], # state 1: Green (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + + ambiguous_puzzle_spec = { + "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + # Notion: Have an object to define a schedule of presentation of + # environments, with the ability to stochastically present one of + # a list of environments. + exp_schedule = { + "setlist": [ + { + "desc": "Initial puzzle set", + "specs": ['unambiguous_puzzle_spec'], + "turns": 50, + "num_stimuli": 6, + "sequence_type": "fixed", + "probs": [[1.0], [1.0]] + }, + { + "desc": "Stochastic puzzle sets", + "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'], + "turns": 200, + "num_stimuli": 6, + "sequence_type": "stochastic", + "probs": [[1.0, 0.0], [0.0, 1.0]] + }, + ] + } + + if 'num_puzzles_on_belt' in kwargs: + num_puzzles_on_belt = kwargs['num_puzzles_on_belt'] + else: + num_puzzles_on_belt = 6 + + pz = unambiguous_puzzle_spec + if (1): + maxrewards = [1] + # Produce Gellermann sequence + upress = RESS() + print(dir(upress)) + print(pz['puzzles']) + print(len(pz['puzzles'])) + upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles'])) + print("upseries", upseries) + # Create puzzle sequence + # call to make_puzzle_list goes about here + + # Instantiate puzzles per Gellermann sequence + puzzles = [] + for stimi in upseries: + + stimn = int(stimi) + myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']), + features=pz['puzzles'][stimn]['features'], + rewards=pz['puzzles'][stimn]['rewards'] + ) + puzzles.append(myp) + # Create conveyor belt + world = ConvBelt(actionSpace = getActionSpace(puzzles), + observationSpace = getObservationSpace(puzzles), + maxRewards = maxrewards, + agentclass=Agent, + randomize = False, alpha=0.005) + # Add puzzles + for pi in puzzles: + world.append(pi) + + return world + + + +def do_experiment(): + # Experiment instance + print('creating myexp') + myexp = Experiment() + print('setting agentclass') + myexp.set_agentclass(Agent) + print('setting environclass') + myexp.set_environclass(ConvBelt) + print('setting evolverclass') + myexp.set_evolverclass(EvolveWeights) + print('setting evolver_attributes') + myexp.set_evolver_attributes() # defaults + print('setting environ_maker') + myexp.set_environ_maker(exp1_environment) # sets function + print('making environment') + myexp.make_environ() # Calls function + print('making evolver_instance') + myexp.make_evolver_instance() + if myexp.validate(): + print('running driver') + myexp.evolver.driver() + else: + print("Experiment failed to validate.") + + + +if __name__ == "__main__": + + print("exp1.py start...") + + do_experiment() + + print("exp1.py done.") diff --git a/code/curio_experiment.py b/code/curio_experiment.py new file mode 100755 index 0000000..4eb04af --- /dev/null +++ b/code/curio_experiment.py @@ -0,0 +1,192 @@ +""" +experiment.py + +Curiosity project Experiment class definition. + +Aim for better encapsulation. + +Experiment class + - This class should get the various classes to use in running an experiment + - EvolveWeights + - mda? + - Environ (GridWorld, ConvBelt, Puzzle) + - Still is going to require ad hoc function to create the particular Environ + - But could pass in function to use + - Agentclass + - And experimental attributes + - For example + - Experiment constructs EW instance, passes in weight length + - Experiment constructs Environ instance + - Experiment requests evolution run of EW with parameters + - EW calls Experiment for each evaluation of an individual (and in what generation) + - Experiment calls Environ.evaluate with individual weights, agentclass + - Passes w, tuple back to EW + +""" + +import sys +import os +import traceback + +class Holder(object): + def __init__(self): + pass + +class Experiment(object): + """ + Experiment class. Instances will drive reinforcement learning experiments. + + + """ + + def __init__(self): + self.agentclass = None + self.environclass = None + self.evolverclass = None + self.environmaker = None + pass + + def validate(self): + valid = True + # Test that we have classes to use + valid = valid and (not self.agentclass in [None]) + valid = valid and (not self.environclass in [None]) + valid = valid and (not self.evolverclass in [None]) + # Test other values here + return valid + + def set_schedule(self, schedule): + self.schedule = schedule + + def set_environ_maker(self, environmaker): + self.environmaker = environmaker + + def make_environ(self): + if not self.environmaker in [None]: + try: + self.environ = self.environmaker() + except: + estr = f"Error: traceback.format_exc()" + print(estr) + self.environ = None + assert 0, "Creating environment failed. Quitting." + + def set_agentclass(self, agentclass): + # Test class for compatibility + okclass = True + # No test yet + + if okclass: + self.agentclass = agentclass + + def get_agentclass(self): + return self.agentclass + + def set_environclass(self, environclass): + # Test class for compatibility + okclass = True + + if not 'evaluate' in dir(environclass): + okclass = False + print("set_environclass error: class does not provide 'evaluate'") + + if okclass: + self.environclass = environclass + + def get_environclass(self): + return self.environclass + + def set_evolverclass(self, evolverclass): + # Test class for compatibility + okclass = True + + if not 'driver' in dir(evolverclass): + okclass = False + print("set_evolverclass error: class does not provide 'driver'") + + if okclass: + self.evolverclass = evolverclass + + def set_agent_attributes(self, alpha=0.005): + self.agent_props = Holder() + self.agent_props.alpha = 0.005 + + def set_evolver_attributes(self, + popsize=100, + maxgenerations=10000, + cxpb=0.5, + mtpb=0.05, + wmin=-20.0, + wmax=20.0, + mut_center=0.0, + mut_sigma=0.1, + mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='environ' + ): + self.evolver_props = Holder() + self.evolver_props.popsize = popsize + self.evolver_props.maxgenerations = maxgenerations + self.evolver_props.cxpb = cxpb + self.evolver_props.mtpb = mtpb + self.evolver_props.wmin = wmin + self.evolver_props.wmax = wmax + self.evolver_props.mut_center = mut_center + self.evolver_props.mut_sigma = mut_sigma + self.evolver_props.mut_indpb = mut_indpb + self.evolver_props.tournsize = tournsize + self.evolver_props.tournk = tournk + self.evolver_props.normalize_fitness = normalize_fitness + self.evolver_props.tag = tag + + def make_evolver_instance(self): + self.evolver = self.evolverclass( + # self.environclass, + # weights_len + weights_len=self.environ.get_weights_len(), + # alpha + alpha=self.environ.alpha, + # evaluate function + evaluate=self.environ.evaluate, + popsize=self.evolver_props.popsize, + maxgenerations=self.evolver_props.maxgenerations, + cxpb=self.evolver_props.cxpb, + mtpb=self.evolver_props.mtpb, + wmin=self.evolver_props.wmin, + wmax=self.evolver_props.wmax, + mut_center= self.evolver_props.mut_center, + mut_sigma= self.evolver_props.mut_sigma, + mut_indpb= self.evolver_props.mut_indpb, + tournsize= self.evolver_props.tournsize, + tournk= self.evolver_props.tournk, + normalize_fitness= self.evolver_props.normalize_fitness, + tag= self.evolver_props.tag + ) + + def set_env_attributes(self): + self.env_props = Holder() + + def handle_evaluation(self, ind, generation): + """ + evolver calls this to get an evaluation of an + individual. + + Depending on the experiment schedule and generation, + this may require constructing a new environment. + """ + pass + + def run_experiment(self): + """ + # Run experiment + ew = EvolveWeights(world, + popsize=100, + maxgenerations=1000, + tournsize=75, + tournk=3, + normalize_fitness=False) + ew.driver() + """ + diff --git a/code/environments/gridworld.py b/code/environments/gridworld.py new file mode 100644 index 0000000..a08b3a7 --- /dev/null +++ b/code/environments/gridworld.py @@ -0,0 +1,93 @@ +# custom version of openAI's gridworld +# to support arbitrary holes + +from typing import Tuple, List, Any + +class GridWorld: + def __init__(self,dims,startState=[0,0]): + self.height = dims[0] + self.width = dims[1] + self.startState = startState + self.state = self.startState[:] + self.holes = [] + self.goals = [] + def reset(self): + '''returns an initial observation while also resetting the environment''' + self.state = self.startState[:] + return self.state + def step(self,action) -> Tuple[Tuple[int], float, bool, Any]: + delta = [0,0] + if (action == 0): delta[0] = -1 + elif (action == 2): delta[0] = 1 + elif (action == 1): delta[1] = 1 + else: delta[1] = -1 + newstate = [self.state[0]+delta[0], self.state[1]+delta[1]] + newstate[0] = min(max(0,newstate[0]),self.height-1) + newstate[1] = min(max(0,newstate[1]),self.width-1) + self.state = newstate + # set default returns + reward = -1.0 + goalFound = False + # check for goal + if self.state in self.goals: + goalFound = True + reward = 0.0 + elif self.state in self.holes: + reward = -10.0 + # openAIgym format: (state, reward, goalAchieved, DebugVisInfo) + return (self.state, reward, goalFound, None) + + def render(env,brain): + # renders a gridworld environment + # and plots the agent's path + import numpy as np + import matplotlib.pyplot as plt + path = [] + brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights) + nextState = env.reset() + dims = [env.height, env.width, 4] + path.append(nextState) + time = 0 + while True: + time += 1 + brain.sensoryState = nextState # SET INPUTS + brain.plasticUpdate() + nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS + path.append(nextState) + if goal_achieved or time == 100: break + brain.reward = reward + y,x = zip(*path) + x,y = (np.array(x)+0.5, np.array(y)+0.5) + # setup figure + plt.figure(figsize=(dims[1],dims[0])) + # plot landmarks + hasGoals = False + goals = [] + hasHoles = False + holes = [] + try: goals = env.goals + except AttributeError: pass + else: hasGoals = True + try: holes = env.holes + except AttributeError: pass + else: hasHoles = True + if hasGoals: + for goal in goals: + newrec = plt.Rectangle((goal[1], goal[0]), 1, 1, color='green', edgecolor=None, linewidth=2.5, alpha=0.7) + plt.gca().add_patch(newrec) + if hasHoles: + for hole in holes: + newrec = plt.Rectangle((hole[1], hole[0]), 1, 1, color='orange', edgecolor=None, linewidth=2.5, alpha=0.7) + plt.gca().add_patch(newrec) + plt.plot(x,y,color='gray') + plt.scatter(x[0],y[0],s=64,color='green') + plt.scatter(x[-1],y[-1],s=64,color='red') + plt.grid(linestyle='--') + plt.ylim([0,dims[0]]) + plt.xlim([0,dims[1]]) + plt.gca().set_yticks(list(range(dims[0]))) + plt.gca().set_xticks(list(range(dims[1]))) + plt.gca().invert_yaxis() + # print out location history + print(' '.join([str(x)+','+str(y) for x,y in path])) + diff --git a/code/environments/puzzle.py b/code/environments/puzzle.py new file mode 100644 index 0000000..515371c --- /dev/null +++ b/code/environments/puzzle.py @@ -0,0 +1,494 @@ +""" +puzzle.py + + +""" + +import numpy as np, itertools +from random import shuffle +from typing import List, Tuple, Union, Any +import copy +#import gym, gym_gridworlds # if using other environments + + +# overridden in agent.py, typically due to load order +LOGGING = True + +import logging, sys +logging.basicConfig(stream=sys.stdout,level=logging.INFO) +log = logging.getLogger() + +if not LOGGING: + # remove all logging functionality + for handler in log.handlers.copy(): + try: + log.removeHandler(handler) + except ValueError: # in case another thread has already removed it + pass + log.addHandler(logging.NullHandler()) + log.propagate = False + +class Puzzle: + + __slots__ = [ + 'tt', + 'features', + 'rewards', + 'state', + 'initialState', + 'solved', + 'solvable', + 'maxrewards', + 'originalrewards'] + + + def __init__(self, tt:List[List[int]], features:List[int], rewards:List[float], initialState:int = 0): + self.tt = tt + self.features = features + self.rewards = rewards[:] + self.originalrewards = rewards + self.state = 0 + self.initialState = initialState + self.solved = False + + + def __str__(self) -> str: + output = "" + output += "transition table:\n" + for row in self.tt: + output += f" {str(row)}\n" + output += f"solved: {self.solved}\n" + output += f"state: {self.state}\n" + output += f"features: {self.features}\n" + output += f"rewards: {self.rewards}\n" + return output + + + def reset(self): + '''must be called before first use''' + self.solved = False + self.state = self.initialState + self.rewards = self.originalrewards[:] + + + def setMaxRewards(self, maxRewards): + '''typically used by the ConvBelt class before reset()''' + self.maxrewards = set(self.rewards) & set(maxRewards) + self.solvable = bool(self.maxrewards) + + + def transition(self,action:int) -> Tuple[float, List[int], bool]: + self.state = self.tt[self.state][action] + finished = False + reward = self.rewards[self.state] + if self.rewards[self.state] in self.maxrewards: + self.rewards[self.state] = -1 # 'eat' the food and replace with empty reward + finished = True + self.solved = True + return (reward, self.features[self.state], finished) + + def getFeatures(self) -> List[int]: + '''returns only the current observable features of the puzzle''' + return self.features[self.state] + + +def Action(index:Union[int,str]) -> Union[str,int]: + ''' action str <-> int Action('pass')->1 Action(1)->'pass' ''' + if isinstance(index, (int,np.int64)): + return ('idle','pass','investigate','eat')[index] + return {'idle':0,'pass':1,'investigate':2,'eat':3}[index] + + +class ConvBelt: + """ + __slots__ = [ + 'puzzles', # (list[Puzzle]) - list of puzzles, use append() + 'pi', # (int) - currently selected puzzle / "puzzle index" + 'puzzle', # (ref:Puzzle) - shortcut for self.puzzles[pi] + 'randomize', # (bool) - shuffling of puzzles between trials + 'maxrewards', # (list[float]) - the maximum achievable rewards + 'action_space', # (tuple[int]) - number of actions available to agents, usually (4,) + 'observation_space', # (tuple[int]) - features/dimensions given to agents (dim1 size, dim2 size...) + 'puzzlesLeftToComplete', # (int) - faster tracking of how many are left, when 0 set self.solved + 'solved', # (bool) - state flag for all puzzles solved (trial can be over) + 'agentclass', + 'killed_reward', + 'max_training_trials', + 'max_steps', + 'alpha', + 'gamma', + 'epsilon', + 'lmbda', + #'get_weights_len', + #'reset', + #'extend', + #'clear', + ] + """ + + def __init__(self,actionSpace,observationSpace,maxRewards, agentclass, + killed_reward=-10.0, max_training_trials=50, max_steps=32, + alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42, randomize=False): + '''please provide entire actionSpace, observationSpace, maxRewards for all puzzles + even those later added this environment''' + self.puzzles = [] + self.pi = 0 + self.puzzle = None + self.randomize = randomize + self.action_space = actionSpace + self.observation_space = observationSpace + self.maxrewards = maxRewards + self.puzzlesLeftToComplete = 0 + self.solved = False + + self.agentclass = agentclass + self.killed_reward = killed_reward + self.max_training_trials = max_training_trials + self.max_steps = max_steps + self.alpha = alpha + self.gamma = gamma + self.epsilon = epsilon + self.lmbda = lmbda + + print(self.get_weights_len()) + + def get_weights_len(self): + """ + Return the length of weights needed for an agent. + """ + print("in ConvBelt.get_weights_len") + mywl = np.prod(tuple(self.observation_space) + tuple(self.action_space)) + return mywl + + def reset(self): + '''returns an initial observation while also resetting the environment''' + log.info("resetting all puzzles") + self.puzzlesLeftToComplete = 0 + for puzzle in self.puzzles: + puzzle.reset() + if puzzle.solvable: + self.puzzlesLeftToComplete += 1 + self.solved = not bool(self.puzzlesLeftToComplete) + if self.randomize: shuffle(self.puzzles) + self.pi = 0 + if len(self.puzzles) == 0: + raise Exception("Please add puzzles to the belt/env first using append() or extend()") + self.puzzle = self.puzzles[self.pi] + return self.puzzle.getFeatures() + + def append(self, newPuzzle:Puzzle): + log.info("adding new puzzle") + newPuzzle.setMaxRewards(self.maxrewards) + newPuzzle.reset() + if newPuzzle.solvable: + self.puzzlesLeftToComplete += 1 + self.solved = False + self.puzzles.append(newPuzzle) + if self.puzzle is None: + self.reset() + + def extend(self, newPuzzles:List[Puzzle]): + log.info(f"adding {len(newPuzzles)} new puzzles") + oldLength = len(self.puzzles) + self.puzzles.extend(newPuzzles) + newLength = len(self.puzzles) + for puzzle_i in range(oldLength, newLength): + puzzle = self.puzzles[puzzle_i] + puzzle.setMaxRewards(self.maxRewards) + puzzle.reset() + if puzzle.solvable: + self.puzzlesLeftToComplete += 1 + self.solved = False + if self.puzzle is None: + self.reset() + + def _post_removal(self): + if len(self.puzzles) == 0: + self.puzzle = None + log.info("puzzles list now empty") + if self.pi >= len(self.puzzles)-1: + self.pi = 0 + log.info("resetting index to 0") + + def clear(self): + '''clears the belt of puzzles''' + self.puzzles.clear() + log.info("removed ALL puzzles") + self.puzzlesLeftToComplete = 0 + self._post_removal() + + def remove(self, puzzle): + '''removes puzzle from belt of puzzles''' + if puzzle.solvable: + self.puzzlesLeftToComplete -= 1 + self.puzzles.remove(puzzle) + log.info("removed puzzle") + self._post_removal() + + def pop(self, index=None): + '''removes puzzle at index or from end''' + if index is None: + index = -1 + puzzle = self.puzzles.pop(index) + if puzzle.solvable: + self.puzzlesLeftToComplete -= 1 + log.info(f"popped puzzle at index {index}") + self._post_removal() + + def _completed_a_puzzle(self): + self.puzzlesLeftToComplete -= 1 + log.info(f"completed a puzzle - {self.puzzlesLeftToComplete} solvable puzzles remain") + if self.puzzlesLeftToComplete == 0: + self.solved = True + log.info(f"all puzzles completed - trial complete") + + def step(self, action:int) -> Tuple[List[int], float, bool, Any]: # returns (state,reward,goal,_) (gym format) + if action == 1: # pass (change to next puzzle, and change no puzzle's state) + self.pi = (self.pi + 1) % len(self.puzzles) + # reports states of old and new puzzles instead of a transition + log.info(f"(puzzle-step) action {action} ({Action(action)}) from old puzzle state {self.puzzle.state} to new puzzle state {self.puzzles[self.pi].state}") + self.puzzle = self.puzzles[self.pi] + return (self.puzzle.features[self.puzzle.state], # features + -1, # reward of a pass + #self.puzzle.rewards[self.puzzle.state], # reward + self.solved, # done-flag + None) # DebugVisInfo + else: + log.info(f"(puzzle-step) action {action} ({Action(action)}) from state {self.puzzle.state} to {self.puzzle.tt[self.puzzle.state][action]}") + reward, features, puzzle_just_finished = self.puzzle.transition(action) + if puzzle_just_finished: + self._completed_a_puzzle() + return (features, reward, self.solved, None) + + def render(self, env, brain): + # renders a puzzlebox environment + import numpy as np + import matplotlib.pyplot as plt + actions = [] + rewards = [] + states = [] + brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights) + nextState = env.reset() + states.append(nextState) + actions.append(0) # path is recording actions in this visualization + rewards.append(-1) + time = 0 + print(env.puzzlesLeftToComplete) + while True: + time += 1 + brain.sensoryState = nextState # SET INPUTS + brain.plasticUpdate() + nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS + actions.append(brain.action) + rewards.append(reward) + states.append(nextState) + if env.puzzlesLeftToComplete == 0 or time == 600: break + #if goal_achieved or time == 100: break + brain.reward = reward + print(actions) + print(states) + plt.figure() + plt.plot(actions) + plt.scatter(list(range(len(actions))),actions) + plt.figure() + plt.plot(rewards) + plt.scatter(list(range(len(rewards))),rewards) + + def evaluate(self, ind, + num_trials=200, + n_actions=4, + HARD_TIME_LIMIT=600): + """ + Given an individual agent's weights, evaluate it and + return its fitness. + """ + w = 0.0 + + # Need to refactor the following code taken from the + # Jupyter notebook. + + # domain-specific settings + #num_trials=200 + #n_actions = 4 + #(optimal lmbda in the agent is domain dependent - could be evolved) + #HARD_TIME_LIMIT = 600 + #KILLED_REWARD = -10 # not used here + #(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code) + #(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code) + + # alpha # how much to weigh reward surprises that deviate from expectation + # gamma # how important exepcted rewards will be + # epsilon # fraction of exploration to exploitation (how often to choose a random action) + # lmbda # how slowly memory of preceeding actions fades away (1=never, 0= + + agent = self.agentclass(obsSpace=self.observation_space, actSpace=self.action_space, alpha=self.alpha, + gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda) + + + # Put weights in the Agent + agent.weights = [x for x in ind] + + time_to_solve_each_trial = [] + rewards = [] + + for trialN in range(self.max_training_trials): + # some output to see it running + if (trialN % 10) == 0: print('.',end='') + # initialize the agent, environment, and time for this trial + agent.reset() # soft-reset() (keeps learned weights) + nextState = self.reset() + time = 0 + while True: + time += 1 + # set agent senses based on environment and allow agent to determine an action + agent.sensoryState = nextState + agent.plasticUpdate() + # determine effect on environment state & any reward (in standard openAI-gym API format) + nextState, reward, goal_achieved, _ = self.step(agent.action) + agent.reward = reward + if self.puzzlesLeftToComplete == 0 or time == self.max_steps: + agent.plasticUpdate() + break + # could have deadly rewards that stop the trial early + #elif reward <= -10: + # agent.sensoryState = nextState + # agent.reward = reward + # agent.plasticUpdate() + # agent.reset() + # nextState = self.reset() + rewards.append(reward) + time_to_solve_each_trial.append(time) + + # Calculate fitness + # Rewards are in [-1 .. 1], have to rescale to [0 .. 1] + #scalerewards = (np.array(rewards) * 0.5) + 0.5 + #w = np.mean(scalerewards) + w = sum(rewards) + + return w, + + +def getObservationSpace(*items) -> Tuple[int]: + '''Returns total features dimensions over all puzzles, starting from 0. + Given 1 or more puzzles, finds union of observation space (features). + then returns the size of that space. + Ensures all puzzles have same feature dimensions, errors if not. + Useful when setting up a RL state space for certain feature sizes. + [3,1] would have dimensions [4,2], and [[0,2],[0,1]] would be [1,3] + + >>> p1 = Puzzle(tt=[[]], rewards=[], features=[[0,1],[0,1],[3,1]]) + >>> getObservationSpace(p1) + (4, 2) + >>> p2 = Puzzle(tt=[[]], rewards=[], features=[[1,1],[1,1],[2,4]]) + >>> getObservationSpace(p2) + (3, 5) + >>> getObservationSpace(p1,p2) + (4, 5) + >>> puzzles = [p1,p2] + >>> getObservationSpace(puzzles) + (4, 5) + ''' + if type(items) is tuple and isinstance(items[0], Puzzle): + # perform union (max) over feature space of all items + highest = copy.copy(items[0].features[0]) # features is [[int,int,...],...] + featurelen = len(highest) + for puzzle in items: + for featureset in puzzle.features: + if len(featureset) != featurelen: + raise Exception("not all features have the same length") + for feature_i in range(len(featureset)): + highest[feature_i] = max(highest[feature_i],featureset[feature_i]) + return tuple((e+1 for e in highest)) # size is 1+highest due to 0-indexing of features + elif type(items) is tuple and type(items[0]) in (tuple,list): + return getObservationSpace(*items[0]) # unpack one layer + else: + raise Exception(f"Expected type of Puzzle(s), but got {type(items)}") + + +def getActionSpace(*items) -> Tuple[int]: + '''Returns total action dimensions over all puzzles, (num columns in tt). + Given 1 or more puzzles. + Ensures all puzzles have same dimensions, errors if not. + Useful when setting up a RL state space for certain action sizes. + + >>> p1 = Puzzle(tt=[[0,0],[4,2]], rewards=[], features=[[]]) + >>> getActionSpace(p1) + (2,) + >>> p2 = Puzzle(tt=[[0,0,1],[1,1,2]], rewards=[], features=[[]]) + >>> getActionSpace(p2) + (3,) + >>> getActionSpace(p1,p2) + Traceback (most recent call last): + ... + Exception: not all puzzles (rows) have the same tt col size + ''' + + if type(items) is tuple and isinstance(items[0], Puzzle): + # perform union (max) over feature space of all items + nrows, ncols = len(items[0].tt), len(items[0].tt[0]) + for puzzle in items: + prows = len(puzzle.tt) + if prows != nrows: + raise Exception("not all puzzles have the same tt row size") + samerows = [len(c) == ncols for c in puzzle.tt] + if not all(samerows): + raise Exception("not all puzzles (rows) have the same tt col size") + return (ncols,) + elif type(items) is tuple and type(items[0]) in (tuple,list): + return getActionSpace(*items[0]) # unpack one layer + else: + raise Exception(f"Expected type of Puzzle(s), but got {type(items)}") + + +def _test_world(): + '''full test of the conveyorbelt world + + >>> import copy + >>> maxrewards = [1] + >>> easy_features = [[0,1],[0,1],[3,1],[0,0]] + >>> easy_rewards = [-1,-1,-1,1] + >>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]]) + >>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards) + >>> p2 = copy.deepcopy(p1) + >>> puzzles = (p1,p2) + >>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False) + >>> world.append(p1) + >>> world.append(p2) + >>> # trial 1 + >>> world.reset() # reset before first use just to be sure + >>> world.step(Action('investigate')) + (-1, [3, 1], False) + >>> world.step(Action('pass')) + (-1, [0, 1], False) + >>> world.step(Action('eat')) + (1, [0, 0], False) + >>> world.step(Action('pass')) + (-1, [3, 1], False) + >>> world.step(Action('eat')) + (1, [0, 0], True) + >>> world.step(Action('eat')) # try eating again, notice reward change + (-1, [0, 0], True) + >>> # trial 2 + >>> world.reset() + >>> world.step(Action('investigate')) + (-1, [3, 1], False) + >>> world.step(Action('pass')) + (-1, [0, 1], False) + >>> world.step(Action('eat')) + (1, [0, 0], False) + >>> world.step(Action('pass')) + (-1, [3, 1], False) + >>> world.step(Action('eat')) + (1, [0, 0], True) + ''' + +if __name__ == '__main__': + '''test important functions and workflows with doctesting + run this python file by itself to run these tests, and set + LOGGING=True near top of file.''' + import doctest + from functools import partial + test = partial(doctest.run_docstring_examples, globs = globals()) + test(getObservationSpace) + test(getActionSpace) + test(_test_world) diff --git a/code/evolve.py b/code/evolve.py new file mode 100644 index 0000000..00ec29d --- /dev/null +++ b/code/evolve.py @@ -0,0 +1,76 @@ +import random +from deap import creator, base, tools, algorithms +import numpy as np + +creator.create("FitnessMax", base.Fitness, weights=(1.0,)) +creator.create("Individual", np.ndarray, fitness=creator.FitnessMax) + +toolbox = base.Toolbox() + +#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version +toolbox.register("attr_float", random.random) +toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=100) +toolbox.register("population", tools.initRepeat, list, toolbox.individual) + +def linearFitness(individual): + '''selection pressure for genome values to be numpy.arange(start=0.0, stop=1.0, step=1/len(genome))''' + import numpy as np + a = np.arange(0, 1, 1.0/len(individual)) + b = np.array(individual) + return 1.0-np.sum(np.abs(a-b))/(len(individual)*0.5), + +def cxTwoPointCopy(ind1, ind2): + """Execute a two points crossover with copy on the input individuals. The + copy is required because the slicing in numpy returns a view of the data, + which leads to a self overwriting in the swap operation. It prevents + :: + >>> import numpy as np + >>> a = np.array((1,2,3,4)) + >>> b = np.array((5,6,7,8)) + >>> a[1:3], b[1:3] = b[1:3], a[1:3] + >>> print(a) + [1 6 7 4] + >>> print(b) + [5 6 7 8] + """ + size = len(ind1) + cxpoint1 = random.randint(1, size) + cxpoint2 = random.randint(1, size - 1) + if cxpoint2 >= cxpoint1: + cxpoint2 += 1 + else: # Swap the two cx points + cxpoint1, cxpoint2 = cxpoint2, cxpoint1 + ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy() + return ind1, ind2 + +toolbox.register("evaluate", linearFitness) +#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version +toolbox.register("mate", cxTwoPointCopy) +#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version +toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=0.05) +toolbox.register("select", tools.selTournament, tournsize=3) + +# evolution loop +population = toolbox.population(n=100) +NGEN=500 +for gen in range(NGEN): + offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1) + # constrain genome values to [0,1] + for offspring_i,individual in enumerate(offspring): + np.clip(np.array(offspring[offspring_i]), 0.0, 1.0) + # Evaluate the individuals with an invalid fitness (not yet evaluated) + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + population = toolbox.select(offspring, k=len(population)) + +# post-evolution analysis +fitnesses = toolbox.map(toolbox.evaluate, population) +sortedFitnesses = sorted(fitnesses) + +bestFitness, worstFitness = sortedFitnesses[0], sortedFitnesses[-1] +print(bestFitness, worstFitness) + +bestGenome = tools.selBest(population, k=1) +print(bestGenome) diff --git a/code/exp1.py b/code/exp1.py new file mode 100755 index 0000000..cb02174 --- /dev/null +++ b/code/exp1.py @@ -0,0 +1,333 @@ +""" +exp1.py - instance of use of 'experiment.py' + +Tasks: + + - Consider how to have a changing schedule of stimulus presentation + +Need to have something where we can see evolution producing a trait that +would indicate interest in new things in the environment. Sets up conditions +where curiosity could be advantageous. + +Conveyor belt needs to have the ability to introduce new things. + +Single factor shift to start -- color of the thing ? +The introduction of novelty is the main thing, where the novelty is +associated with fitness advantage. + +Simple systems to test + - constant environment + - switch between two different environments + - frequency of shift makes a difference + - Goldilocks zone for intermediate frequency +Controlled randomization + - Known low-payoff 'food' in environment + - Better thing has a cue + - Changing frequency of presentation + - Constant + - Ramp + - Cycle + - 'Green' could indicate better but + - x factor for better could be changed + +For all of these, we can test unseen (novel) stimuli + - Generalization can be tested + - Cue of goodness + - Proportion of time novel stimulus are rewarding + - Must be a proportion to introduce unpredictability + +One hypothesis: unpredictability between cues and rewards may lead to curiosity + - Evolutionary timescale of unpredictability + - Predictable lifetime + +Push current code to repository. + + +""" + + +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime +import copy + +import numpy as np # , itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +import environments +import agents + +# always forces a reload in case you have edited environments or agents +importlib.reload(environments) +importlib.reload(agents) +#from environments.gridworld import GridWorld +import environments.puzzle as pz +from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace + +from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +from mda import MultiDimArray + +# RESS +from ress import RESS + +# EvolveWeights +# from ew import EvolveWeights +from curio_evolve_weights import EvolveWeights + +# Experiment +from experiment import Experiment + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + """ + A general class for the equivalent of a digital duffle bag, each instance + can have essentially whatever you want stuffed into it. + + This is essentially the very opposite of defining classes with the + __slots__ convention, leaving the contents entirely open. + + I've found this useful for making context objects. If I am careful, + the whole object can be serialized to disk and loaded later. + """ + def __init__(self): + pass + +""" +Probability of reward at all +Probability of strength of reward + +Variances: + - How many puzzle cues do we have? + - How often does a puzzle appear in training? + - How often does a puzzle appear across evolutionary time? + - How much reward does solving a puzzle deliver? + +Two things , green | red + green good + red bad + +Outcomes + - Too unlikely -> no behavior to examine + - Entirely predictable + - In between -> curiosity has advantage + + First sample from uniform distribution to determine reward (0.5) + Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.) + +Spot or range where it becomes advantageous to evolve a curiosity module... + + +Figuring out a representation that allows all the flexibility we discussed... + + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[2], # state 0: Green + [2], # state 1: Green (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [ + [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1, # state 2: consume (reward) + 0.5 # Proportion + ], + [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -1, # state 2: consume (punishment) + 0.5 # Proportion + ], + ] + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + + +""" + +def exp1_environment(*args, **kwargs): + + unambiguous_puzzle_spec = { + "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[2], # state 0: Green + [2], # state 1: Green (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + + ambiguous_puzzle_spec = { + "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", + "puzzles": [ + { + "puzzle_description": "Appetitive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating) + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + 1], # state 2: consume (reward) + }, + { + "puzzle_description": "Aversive puzzle", + "tt": [[0,0,2], # state 0: first presentation + [0,0,0], # state 1: getting passed over (placeholder) + [2,2,2]], # state 2: consumed (saturating)], + "features": [[1], # state 0: Red + [1], # state 1: Red (placeholder) + [0]], # state 2: Empty/Unknown (after being eaten) + "rewards": [-1, # state 0: first look + -1, # state 1: proceeding to next puzzle (placeholder) + -2], # state 2: consume (punishment) + }, + ] + } + # Notion: Have an object to define a schedule of presentation of + # environments, with the ability to stochastically present one of + # a list of environments. + exp_schedule = { + "setlist": [ + { + "desc": "Initial puzzle set", + "specs": [unambiguous_puzzle_spec], + "turns": 50, + "num_stimuli": 6, + "sequence_type": "fixed", + "probs": [[1.0], [1.0]] + }, + { + "desc": "Stochastic puzzle sets", + "specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec], + "turns": 200, + "num_stimuli": 6, + "sequence_type": "stochastic", + "probs": [[1.0, 0.0], [0.0, 1.0]] + }, + ] + } + + if 'num_puzzles_on_belt' in kwargs: + num_puzzles_on_belt = 6 + + pz = unambiguous_puzzle_spec + if (1): + maxrewards = [1] + # Produce Gellermann sequence + upress = RESS() + print(dir(upress)) + print(pz['puzzles']) + print(len(pz['puzzles'])) + upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles'])) + print("upseries", upseries) + # Create puzzle sequence + # Instantiate puzzles per Gellermann sequence + puzzles = [] + for stimi in upseries: + + stimn = int(stimi) + myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']), + features=pz['puzzles'][stimn]['features'], + rewards=pz['puzzles'][stimn]['rewards'] + ) + puzzles.append(myp) + # Create conveyor belt + world = ConvBelt(actionSpace = getActionSpace(puzzles), + observationSpace = getObservationSpace(puzzles), + maxRewards = maxrewards, + agentclass=Agent, + randomize = False, alpha=0.005) + # Add puzzles + for pi in puzzles: + world.append(pi) + + return world + +def do_experiment(): + # Experiment instance + myexp = Experiment() + myexp.set_agentclass(Agent) + myexp.set_environclass(ConvBelt) + myexp.set_evolverclass(EvolveWeights) + myexp.set_evolver_attributes() # defaults + myexp.set_environ_maker(exp1_environment) # sets function + myexp.make_environ() # Calls function + myexp.make_evolver_instance() + if myexp.validate(): + myexp.evolver.driver() + else: + print("Experiment failed to validate.") + + + +if __name__ == "__main__": + + print("exp1.py start...") + + + do_experiment() + + print("exp1.py done.") diff --git a/code/experiment.py b/code/experiment.py new file mode 100755 index 0000000..5530577 --- /dev/null +++ b/code/experiment.py @@ -0,0 +1,185 @@ +""" +experiment.py + +Curiosity project Experiment class definition. + +Aim for better encapsulation. + +Experiment class + - This class should get the various classes to use in running an experiment + - EvolveWeights + - mda? + - Environ (GridWorld, ConvBelt, Puzzle) + - Still is going to require ad hoc function to create the particular Environ + - But could pass in function to use + - Agentclass + - And experimental attributes + - For example + - Experiment constructs EW instance, passes in weight length + - Experiment constructs Environ instance + - Experiment requests evolution run of EW with parameters + - EW calls Experiment for each evaluation of an individual (and in what generation) + - Experiment calls Environ.evaluate with individual weights, agentclass + - Passes w, tuple back to EW + +""" + +import sys +import os +import traceback + +class Holder(object): + def __init__(self): + pass + +class Experiment(object): + """ + Experiment class. Instances will drive reinforcement learning experiments. + + + """ + + def __init__(self): + self.agentclass = None + self.environclass = None + self.evolverclass = None + self.environmaker = None + pass + + def validate(self): + valid = True + # Test that we have classes to use + valid = valid and (not self.agentclass in [None]) + valid = valid and (not self.environclass in [None]) + valid = valid and (not self.evolverclass in [None]) + # Test other values here + return valid + + def set_schedule(self, schedule): + self.schedule = schedule + + def set_environ_maker(self, environmaker): + self.environmaker = environmaker + + def make_environ(self): + if not self.environmaker in [None]: + try: + self.environ = self.environmaker() + except: + estr = f"Error: traceback.format_exc()" + print(estr) + self.environ = None + + def set_agentclass(self, agentclass): + # Test class for compatibility + okclass = True + # No test yet + + if okclass: + self.agentclass = agentclass + + def get_agentclass(self): + return self.agentclass + + def set_environclass(self, environclass): + # Test class for compatibility + okclass = True + + if not 'evaluate' in dir(environclass): + okclass = False + print("set_environclass error: class does not provide 'evaluate'") + + if okclass: + self.environclass = environclass + + def get_environclass(self): + return self.environclass + + def set_evolverclass(self, evolverclass): + # Test class for compatibility + okclass = True + + if not 'driver' in dir(evolverclass): + okclass = False + print("set_evolverclass error: class does not provide 'driver'") + + if okclass: + self.evolverclass = evolverclass + + def set_agent_attributes(self, alpha=0.005): + self.agent_props = Holder() + self.agent_props.alpha = 0.005 + + def set_evolver_attributes(self, + popsize=100, + maxgenerations=10000, + cxpb=0.5, + mtpb=0.05, + wmin=-20.0, + wmax=20.0, + mut_center=0.0, + mut_sigma=0.1, + mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='environ' + ): + self.evolver_props = Holder() + self.evolver_props.popsize = popsize + self.evolver_props.maxgenerations = maxgenerations + self.evolver_props.cxpb = cxpb + self.evolver_props.mtpb = mtpb + self.evolver_props.wmin = wmin + self.evolver_props.wmax = wmax + self.evolver_props.mut_center = mut_center + self.evolver_props.mut_sigma = mut_sigma + self.evolver_props.mut_indpb = mut_indpb + self.evolver_props.tournsize = tournsize + self.evolver_props.tournk = tournk + self.evolver_props.normalize_fitness = normalize_fitness + self.evolver_props.tag = tag + + def make_evolver_instance(self): + self.evolver = self.evolverclass( + self.environclass, + popsize=self.evolver_props.popsize, + maxgenerations=self.evolver_props.maxgenerations, + cxpb=self.evolver_props.cxpb, + mtpb=self.evolver_props.mtpb, + wmin=self.evolver_props.wmin, + wmax=self.evolver_props.wmax, + mut_center= self.evolver_props.mut_center, + mut_sigma= self.evolver_props.mut_sigma, + mut_indpb= self.evolver_props.mut_indpb, + tournsize= self.evolver_props.tournsize, + tournk= self.evolver_props.tournk, + normalize_fitness= self.evolver_props.normalize_fitness, + tag= self.evolver_props.tag + ) + + def set_env_attributes(self): + self.env_props = Holder() + + def handle_evaluation(self, ind, generation): + """ + evolver calls this to get an evaluation of an + individual. + + Depending on the experiment schedule and generation, + this may require constructing a new environment. + """ + pass + + def run_experiment(self): + """ + # Run experiment + ew = EvolveWeights(world, + popsize=100, + maxgenerations=1000, + tournsize=75, + tournk=3, + normalize_fitness=False) + ew.driver() + """ + diff --git a/code/gwe.py b/code/gwe.py new file mode 100644 index 0000000..7b7e1dc --- /dev/null +++ b/code/gwe.py @@ -0,0 +1,438 @@ +""" +gwe.py -- GridWorld Evolving + +Bringing together an Agent acting in GridWorld with +DEAP evolutionary computation. + +Notion: Set up for being able to call an Agent with +a provided set of weights and run their training in +a Gridworld environment. DEAP keeps a population of +weights and handles the evolutionary computation. +Save the best instantiated Agent per each generation +for later review and analysis. +""" +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime + +import numpy as np, itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +import environments +import agents + +# always forces a reload in case you have edited environments or agents +importlib.reload(environments) +importlib.reload(agents) +from environments.gridworld import GridWorld +from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +from mda import MultiDimArray + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + def __init__(self): + pass + +class GoalsAndHolesWorld(object): + """ + Class for making and using a 2D GridWorld based on + setting goals and holes (hazards) for an RL Agent + to explore. + + """ + def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass, + killed_reward=-10.0, max_training_trials=50, max_steps=32, + alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42 + ): + self.obsSpace = tuple(obsSpace) + self.actSpace = tuple(actSpace) + self.goals = list(goals) + self.holes = tuple(holes) + self.startState = tuple(startstate) + self.agentclass = agentclass + self.killed_reward = killed_reward + self.max_training_trials = max_training_trials + self.max_steps = max_steps + self.alpha = alpha + self.gamma = gamma + self.epsilon = epsilon + self.lmbda = lmbda + self.env = self.make_env(self.startState, self.obsSpace, self.goals, self.holes) + print("Goals from env", self.env.goals) + pass + + def get_weights_len(self): + mywl = np.prod(tuple(self.obsSpace) + tuple(self.actSpace)) + return mywl + + def make_env(self, startstate=None, dims=None, goals=None, holes=None): + if startstate in [None]: + startstate = self.startState + if dims in [None]: + dims = self.obsSpace + if goals in [None]: + goals = list(self.goals) + if holes in [None]: + holes = self.holes + print(startstate, dims, goals, holes) + myenv = GridWorld(dims = dims, startState = startstate) + myenv.goals.append(goals) + for ii in range(holes[0][0], holes[0][1]+1): + for jj in range(holes[1][0], holes[1][1]+1): + print("adding hole at ", ii, jj) + myenv.holes.append([ii,jj]) + return myenv + + def run_trial(self, agent, env=None): + if env in [None]: + env = self.env + agent.reset() # soft-reset() (keeps learned weights) + nextState = env.reset() + lastState = nextState + runtime = 0 + while True: + runtime += 1 + status = 'alive' + # set agent senses based on environment and allow agent to determine an action + agent.sensoryState = nextState + agent.plasticUpdate() + # determine effect on environment state & any reward (in standard openAI-gym API format) + nextState, reward, goal_achieved, _ = env.step(agent.action) + #if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)): + # print(agent.action, lastState, reward, goal_achieved, nextState) + lastState = nextState + agent.reward = reward + if goal_achieved or (runtime >= self.max_steps): break + # stop trial if agent explitly failed early + elif reward <= self.killed_reward: + agent.sensoryState = nextState + agent.reward = reward + agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward + agent.reset() + nextState = env.reset() + status = 'killed' + runtime = self.max_steps + break + # print(time, agent.action, agent.reward, status) + #print(" runtime", runtime) + #if goal_achieved: + # print(" Goal Achieved!!!") + return agent, runtime + + def evaluate(self, ind, return_agent=False): + """ + + """ + latest = 20 + # Pull weights from ind + # Instantiate an Agent + myagent = Agent(obsSpace=self.obsSpace, actSpace=self.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda) + # Put weights in the Agent + myagent.weights = [x for x in ind] + #print(" myagent.weights", myagent.weights) + # run_trial calls + time_to_solve_each_trial = [] # lower is better + for trialN in range(self.max_training_trials): + # some output to see it running + # if (trialN % 10) == 0: print('.',end='') + myagent, runtime = self.run_trial(myagent) + # record trial results + time_to_solve_each_trial.append(runtime) + #print(" tts", time_to_solve_each_trial) + # calculate fitness + # Fitness is 1 - (avg. tts / max. time) + # w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps)) + ltts = len(time_to_solve_each_trial) + latest = ltts // 2 + # Latter half of steps + #w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps)) + # First half of steps + w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps)) + # return the fitness + #print(" fitness", "%3.2f" % w) + #print(" myagent.weights after", myagent.weights) + if return_agent: + return myagent, w, time_to_solve_each_trial + else: + return w, + + +class MaxAve(object): + def __init__(self, alpha=0.1): + self.alpha = alpha + pass + + def get_weights_len(self, wl=100): + return wl + + def evaluate(self, ind): + npwts = np.array([x for x in ind]) + wtmax = np.max(np.abs(npwts)) + wtmean = np.mean(np.abs(npwts)) + if 0.0 != wtmax: + w = wtmean / wtmax + else: + w = 0.0 + return w, + +class EvolveWeights(object): + """ + Class to apply DEAP to evolve a population consisting of a set + of weights. + """ + + def __init__(self, gahw, + popsize=100, maxgenerations=10000, + cxpb=0.5, mtpb=0.05, + wmin=-20.0, wmax=20.0, + mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='gahw' + ): + self.tag = tag + self.starttime = isotime() + self.logbase = tag + "_" + t2fn(self.starttime) + + self.gahw = gahw + self.weights_len = gahw.get_weights_len() + + self.popsize = popsize + self.maxgenerations = maxgenerations + self.cxpb = cxpb + self.mtpb = mtpb + self.wmin = wmin + self.wmax = wmax + self.mut_center = mut_center + self.mut_sigma = mut_sigma + self.mut_indpb = mut_indpb + self.tournsize = tournsize + self.tournk = tournk + self.normalize_fitness = normalize_fitness + pass + + def masv(self, pop): + mav = [] + maxs = [] + for ind in pop: + wts = [x for x in ind] + mav.append(np.mean(np.abs(wts))) + maxs.append(np.max(np.abs(wts))) + allmax = np.max(maxs) + mymasv = [x/allmax for x in mav] + return mymasv + + def cxTwoPointCopy(self, ind1, ind2): + """Execute a two points crossover with copy on the input individuals. The + copy is required because the slicing in numpy returns a view of the data, + which leads to a self overwriting in the swap operation. It prevents + :: + >>> import numpy as np + >>> a = np.array((1,2,3,4)) + >>> b = np.array((5,6,7,8)) + >>> a[1:3], b[1:3] = b[1:3], a[1:3] + >>> print(a) + [1 6 7 4] + >>> print(b) + [5 6 7 8] + """ + size = len(ind1) + cxpoint1 = random.randint(1, size) + cxpoint2 = random.randint(1, size - 1) + if cxpoint2 >= cxpoint1: + cxpoint2 += 1 + else: # Swap the two cx points + cxpoint1, cxpoint2 = cxpoint2, cxpoint1 + ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy() + return ind1, ind2 + + def zero(self): + return 0.0 + + def smallrandom(self, eps=None): + """ + Produce a small random number in [-eps .. eps]. + + A random variate in [-1 .. 1] is produced then + multiplied by eps, so the final range is in [-eps .. eps]. + + """ + if eps in [None]: + eps = self.gahw.alpha + rv = ((2.0 * random.random()) - 1.0) * eps + return rv + + def setup(self): + creator.create("FitnessMax", base.Fitness, weights=(1.0,)) + creator.create("Individual", np.ndarray, fitness=creator.FitnessMax) + + self.toolbox = base.Toolbox() + + self.pool = multiprocessing.Pool() + self.toolbox.register("map", self.pool.map) + + #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version + # self.toolbox.register("attr_float", random.random) + #self.toolbox.register("attr_float", self.zero) + self.toolbox.register("attr_float", self.smallrandom) + + self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len) + self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) + + self.toolbox.register("evaluate", self.gahw.evaluate) + #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version + self.toolbox.register("mate", self.cxTwoPointCopy) + #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version + self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb) + self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk) + + def normalize_fitnesses(self, fitnesses): + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + return fitnesses + + def log_it(self, generation): + pool = self.pool + toolbox = self.toolbox + self.pool = None + self.toolbox = None + pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl" + pickle.dump(self, open(pklfn, "wb")) + self.pool = pool + self.toolbox = toolbox + + def loop(self): + self.population = self.toolbox.population(n=self.popsize) + #print(self.masv(self.population)) + NGEN=self.maxgenerations + for gen in range(NGEN): + print("generation", gen) + offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb) + # print("offspring", offspring) + # constrain genome values to [0,1] + for offspring_i,individual in enumerate(offspring): + np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax) + # print("clipped offspring", offspring) + # Evaluate the individuals with an invalid fitness (not yet evaluated) + # print("check fitness.valid") + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + # print("invalid_ind", len(invalid_ind)) + #print("setting fitness") + fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + """ + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + """ + # print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + self.fitness_dist(fitnesses) + # print("update ind fitness") + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + #print("selection") + #print("offspring\n", self.masv(offspring)) + self.offspring = offspring + self.population = self.toolbox.select(offspring, k=len(self.population)) + if 0 == gen % 100: + self.log_it(gen) + + #print("population after selection\n", self.masv(self.population)) + #print("Report for generation", gen) + self.report() + + def report(self): + # post-evolution analysis + fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + self.fitnesses = fitnesses + self.sortedFitnesses = sorted(fitnesses) + self.sortedFitnesses.reverse() + self.fitness_dist(fitnesses) + + self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1] + print("best/worst w", self.bestFitness, self.worstFitness) + + self.bestGenome = tools.selBest(self.population, k=1) + # print(self.bestGenome) + + def ffmt(self, value, fmt="%3.2f"): + return fmt % value + + def fitness_dist(self, fitnesses): + listfit = [x[0] for x in fitnesses] + pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95]) + print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}") + + def driver(self): + # Initialize + self.setup() + # Generation loop + self.loop() + # Report + self.report() + self.log_it(self.maxgenerations) + print(self.masv(self.population)) + pass + +def holes_block_direct_route(): + # GridWorld as in 'gridworld.ipynb' + gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200) + ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=100, tournk=2, normalize_fitness=False) + ew.driver() + +def maxave(): + ma = MaxAve() + ew = EvolveWeights(ma, popsize = 100, maxgenerations=100) + ew.driver() + +if __name__ == "__main__": + + holes_block_direct_route() + # maxave() + + pass diff --git a/code/mda.py b/code/mda.py new file mode 100644 index 0000000..4afb97b --- /dev/null +++ b/code/mda.py @@ -0,0 +1,85 @@ +import numpy as np +from typing import Any, Union, List, Tuple + +class MultiDimArray: + """ + A class to represent and manipulate multi-dimensional arrays. + + Attributes + ---------- + mdary : numpy.ndarray + A multi-dimensional array containing the input data. + shape : tuple + The shape of the input multi-dimensional array. + + Methods + ------- + flatten(output_type="list") -> Union[List, Tuple, np.ndarray]: + Returns the flattened version of the multi-dimensional array as a list, tuple, or Numpy array. + + foldout(vector, output_type="list") -> Union[List, Tuple, np.ndarray]: + Reshapes a 1D vector back into the original shape of the multi-dimensional array, + and returns it as a list, tuple, or Numpy array. + """ + def __init__(self, mdary: Union[List, Tuple, np.ndarray]): + self.mdary = np.array(mdary) + self.shape = self.mdary.shape + + def flatten(self, output_type: str = "list") -> Union[List, Tuple, np.ndarray]: + """ + Flatten the multi-dimensional array. + + Parameters + ---------- + output_type : str, optional + The output type of the flattened array, either 'list', 'tuple', or 'numpy' (default is 'list'). + + Returns + ------- + Union[List, Tuple, np.ndarray] + The flattened version of the multi-dimensional array in the specified output + """ + flat_array = self.mdary.flatten() + + if output_type == "list": + return flat_array.tolist() + elif output_type == "tuple": + return tuple(flat_array) + elif output_type == "numpy": + return flat_array + else: + raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'") + + def foldout(self, vector: Union[List, Tuple, np.ndarray], output_type: str = "list") -> Union[List, Tuple, np.ndarray]: + if len(vector) != self.mdary.size: + raise ValueError("The input vector must have the same length as the flattened form of the multi-dimensional array") + + reshaped_array = np.reshape(vector, self.shape) + + if output_type == "list": + return reshaped_array.tolist() + elif output_type == "tuple": + return tuple(map(tuple, reshaped_array)) + elif output_type == "numpy": + return reshaped_array + else: + raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'") + +if __name__ == "__main__": + """ + Example usage: + """ + mda = MultiDimArray([[1, 2], [3, 4], [5,6]]) + #mda = MultiDimArray([1, 2, 3, 4, 5,6]) + print(f"Input array: {str(mda.mdary.tolist())}") + flat = mda.flatten(output_type="list") + print(f"Flattened array: {flat}") + + # Assuming the flat array is [1, 2, 3, 4] + folded = mda.foldout(flat, output_type="list") + print(f"Folded back array: {folded}") + + """ + The folded back array should be numerically identical to the original mdary: + [[1, 2], [3, 4]] + """ diff --git a/code/multigwe.py b/code/multigwe.py new file mode 100644 index 0000000..4bd7665 --- /dev/null +++ b/code/multigwe.py @@ -0,0 +1,568 @@ +"""multigwe.py -- Multi GridWorlds Evolving + +Bringing together an Agent acting in one of multiple GridWorlds with +DEAP evolutionary computation. + +Notion: Set up for being able to call an Agent with a provided set of +weights and run their training in one of multiple Gridworld +environments. DEAP keeps a population of weights and handles the +evolutionary computation. Save the best instantiated Agent per each +generation for later review and analysis. + +""" +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime + +import numpy as np, itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +import environments +import agents + +# always forces a reload in case you have edited environments or agents +importlib.reload(environments) +importlib.reload(agents) +from environments.gridworld import GridWorld +from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +from mda import MultiDimArray + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + def __init__(self): + pass + +class GoalsAndHolesWorld(object): + """ + Class for making and using a 2D GridWorld based on + setting goals and holes (hazards) for an RL Agent + to explore. + + Modifications for multiple maps... + Need a 'maps' array + + """ + def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass, + killed_reward=-10.0, max_training_trials=50, max_steps=32, + alpha=0.005, gamma=0.95, epsilon=0.01, lmbda=0.42 + ): + + self.maps = [] + mymap = Holder() + self.add_map(obsSpace, actSpace, goals, holes, startstate) + # Instance now has the initial map in place + + self.agentclass = agentclass + self.killed_reward = killed_reward + self.max_training_trials = max_training_trials + self.max_steps = max_steps + self.alpha = alpha + self.gamma = gamma + self.epsilon = epsilon + self.lmbda = lmbda + print("Goals from initial env", self.maps[0].env.goals) + pass + + def get_weights_len(self): + mywl = np.prod(tuple(self.maps[0].obsSpace) + tuple(self.maps[0].actSpace)) + return mywl + + def add_map(self, obsSpace, actSpace, goals, holes, startstate): + mymap = Holder() + mymap.obsSpace = tuple(obsSpace) + mymap.actSpace = tuple(actSpace) + mymap.goals = list(goals) + mymap.holes = tuple(holes) + mymap.startState = tuple(startstate) + mymap.env = self.make_env(mymap.startState, mymap.obsSpace, mymap.goals, mymap.holes) + self.maps.append(mymap) + + def make_env(self, startstate=None, dims=None, goals=None, holes=None): + # Default: the first map in the list. + if startstate in [None] and 0 < len(self.maps): + startstate = self.maps[0].startState + if dims in [None] and 0 < len(self.maps): + dims = self.maps[0].obsSpace + if goals in [None] and 0 < len(self.maps): + goals = list(self.maps[0].goals) + if holes in [None] and 0 < len(self.maps): + holes = self.maps[0].holes + print(startstate, dims, goals, holes) + myenv = GridWorld(dims = dims, startState = startstate) + myenv.goals.append(goals) + for ii in range(holes[0][0], holes[0][1]+1): + for jj in range(holes[1][0], holes[1][1]+1): + print("adding hole at ", ii, jj) + myenv.holes.append([ii,jj]) + return myenv + + def run_trial(self, agent, env=None): + if env in [None]: + # Choose an environment + """ + if 1 == len(self.maps): + mymap = self.maps[0] + else: + mymap = random.choice(self.maps) + """ + mymap = self.choose_map() + env = mymap.env + + agent.reset() # soft-reset() (keeps learned weights) + nextState = env.reset() + lastState = nextState + runtime = 0 + while True: + runtime += 1 + status = 'alive' + # set agent senses based on environment and allow agent to determine an action + agent.sensoryState = nextState + agent.plasticUpdate() + # determine effect on environment state & any reward (in standard openAI-gym API format) + nextState, reward, goal_achieved, _ = env.step(agent.action) + + #if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)): + # print(agent.action, lastState, reward, goal_achieved, nextState) + lastState = nextState + agent.reward = reward + if goal_achieved or (runtime >= self.max_steps): break + # stop trial if agent explitly failed early + elif reward <= self.killed_reward: + agent.sensoryState = nextState + agent.reward = reward + agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward + agent.reset() + nextState = env.reset() + status = 'killed' + runtime = self.max_steps + break + # print(time, agent.action, agent.reward, status) + #print(" runtime", runtime) + #if goal_achieved: + # print(" Goal Achieved!!!") + return agent, runtime + + def choose_map(self, map_index=None): + """ + If map_index in [0..len(self.maps)], return that one. + Else return one randomly. + """ + # print("self.maps", self.maps) + + if map_index in [None]: + # Random choice of map from alternatives + if 1 == len(self.maps): # There can only be one + mymap = self.maps[0] + else: # Choose one of them + mymap = random.choice(self.maps) + elif 0 <= map_index and map_index < len(self.maps): + mymap = self.maps[map_index] + else: + mymap = random.choice(self.maps) + return mymap + + def evaluate(self, ind, return_agent=False): + """ + + """ + latest = 20 + # Pull weights from ind + + # Choose an environment + """ + if 1 == len(self.maps): + mymap = self.maps[0] + else: + mymap = random.choice(self.maps) + """ + + # New way + mymap = self.choose_map() + + myenv = mymap.env + + # Instantiate an Agent + myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda) + + # Should consider one round of single trial to get the performance due to + # inheritance, then proceed with full trials to 'develop' the agent, + # and get its trained performance. + + # Put weights in the Agent + myagent.weights = [x for x in ind] + #print(" myagent.weights", myagent.weights) + # run_trial calls + time_to_solve_each_trial = [] # lower is better + for trialN in range(self.max_training_trials): + # some output to see it running + # if (trialN % 10) == 0: print('.',end='') + myagent, runtime = self.run_trial(myagent, env=myenv) + # record trial results + time_to_solve_each_trial.append(runtime) + #print(" tts", time_to_solve_each_trial) + # calculate fitness + # Fitness is 1 - (avg. tts / max. time) + # w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps)) + ltts = len(time_to_solve_each_trial) + latest = ltts // 2 + # Latter half of steps + #w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps)) + # First half of steps + w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps)) + # return the fitness + #print(" fitness", "%3.2f" % w) + #print(" myagent.weights after", myagent.weights) + if return_agent: + return myagent, w, time_to_solve_each_trial + else: + return w, + + + def multi_evaluate(self, ind, return_agent=False): + """ + Like 'evaluate', but when multiple maps exist, evaluate per + each map, collect performance, and return fitness as the + mean performance across all maps. + + """ + latest = 20 + # Pull weights from ind + + # Info across all maps/environments + time_to_solve_each_trial = [] # lower is better + for mymap in self.maps: + myenv = mymap.env + # Instantiate an Agent + myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda) + # Put weights in the Agent + myagent.weights = [x for x in ind] + #print(" myagent.weights", myagent.weights) + # run_trial calls + for trialN in range(self.max_training_trials): + # some output to see it running + # if (trialN % 10) == 0: print('.',end='') + myagent, runtime = self.run_trial(myagent, env=myenv) + # record trial results + time_to_solve_each_trial.append(runtime) + + # calculate fitness + # Fitness is 1 - (avg. tts / max. time) + w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps)) + # return the fitness + if return_agent: + return myagent, w, time_to_solve_each_trial + else: + return w, + +class MaxAve(object): + def __init__(self, alpha=0.1): + self.alpha = alpha + pass + + def get_weights_len(self, wl=100): + return wl + + def evaluate(self, ind): + npwts = np.array([x for x in ind]) + wtmax = np.max(np.abs(npwts)) + wtmean = np.mean(np.abs(npwts)) + if 0.0 != wtmax: + w = wtmean / wtmax + else: + w = 0.0 + return w, + +class EvolveWeights(object): + """ + Class to apply DEAP to evolve a population consisting of a set + of weights. + """ + + def __init__(self, gahw, + popsize=100, maxgenerations=10000, + cxpb=0.5, mtpb=0.05, + wmin=-20.0, wmax=20.0, + mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='gahw' + ): + self.tag = tag + self.starttime = isotime() + self.logbase = tag + "_" + t2fn(self.starttime) + + self.gahw = gahw + self.weights_len = gahw.get_weights_len() + + self.popsize = popsize + self.maxgenerations = maxgenerations + self.cxpb = cxpb + self.mtpb = mtpb + self.wmin = wmin + self.wmax = wmax + self.mut_center = mut_center + self.mut_sigma = mut_sigma + self.mut_indpb = mut_indpb + self.tournsize = tournsize + self.tournk = tournk + self.normalize_fitness = normalize_fitness + pass + + def masv(self, pop): + mav = [] + maxs = [] + for ind in pop: + wts = [x for x in ind] + mav.append(np.mean(np.abs(wts))) + maxs.append(np.max(np.abs(wts))) + allmax = np.max(maxs) + mymasv = [x/allmax for x in mav] + return mymasv + + def cxTwoPointCopy(self, ind1, ind2): + """Execute a two points crossover with copy on the input individuals. The + copy is required because the slicing in numpy returns a view of the data, + which leads to a self overwriting in the swap operation. It prevents + :: + >>> import numpy as np + >>> a = np.array((1,2,3,4)) + >>> b = np.array((5,6,7,8)) + >>> a[1:3], b[1:3] = b[1:3], a[1:3] + >>> print(a) + [1 6 7 4] + >>> print(b) + [5 6 7 8] + """ + size = len(ind1) + cxpoint1 = random.randint(1, size) + cxpoint2 = random.randint(1, size - 1) + if cxpoint2 >= cxpoint1: + cxpoint2 += 1 + else: # Swap the two cx points + cxpoint1, cxpoint2 = cxpoint2, cxpoint1 + ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy() + return ind1, ind2 + + def zero(self): + return 0.0 + + def smallrandom(self, eps=None): + """ + Produce a small random number in [-eps .. eps]. + + A random variate in [-1 .. 1] is produced then + multiplied by eps, so the final range is in [-eps .. eps]. + + """ + if eps in [None]: + eps = self.gahw.alpha + rv = ((2.0 * random.random()) - 1.0) * eps + return rv + + def setup(self): + creator.create("FitnessMax", base.Fitness, weights=(1.0,)) + creator.create("Individual", np.ndarray, fitness=creator.FitnessMax) + + self.toolbox = base.Toolbox() + + self.pool = multiprocessing.Pool() + self.toolbox.register("map", self.pool.map) + + #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version + # self.toolbox.register("attr_float", random.random) + #self.toolbox.register("attr_float", self.zero) + self.toolbox.register("attr_float", self.smallrandom) + + self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len) + self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) + + # self.toolbox.register("evaluate", self.gahw.evaluate) + self.toolbox.register("evaluate", self.gahw.multi_evaluate) + #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version + self.toolbox.register("mate", self.cxTwoPointCopy) + #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version + self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb) + self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk) + + def normalize_fitnesses(self, fitnesses): + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + return fitnesses + + def log_it(self, generation): + pool = self.pool + toolbox = self.toolbox + self.pool = None + self.toolbox = None + pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl" + pickle.dump(self, open(pklfn, "wb")) + self.pool = pool + self.toolbox = toolbox + + def loop(self): + self.population = self.toolbox.population(n=self.popsize) + #print(self.masv(self.population)) + NGEN=self.maxgenerations + for gen in range(NGEN): + print("generation", gen) + offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb) + # print("offspring", offspring) + # constrain genome values to [0,1] + for offspring_i,individual in enumerate(offspring): + np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax) + # print("clipped offspring", offspring) + # Evaluate the individuals with an invalid fitness (not yet evaluated) + # print("check fitness.valid") + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + # print("invalid_ind", len(invalid_ind)) + #print("setting fitness") + fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + """ + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + """ + # print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + self.fitness_dist(fitnesses) + # print("update ind fitness") + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + #print("selection") + #print("offspring\n", self.masv(offspring)) + self.offspring = offspring + self.population = self.toolbox.select(offspring, k=len(self.population)) + if 0 == gen % 100: + self.log_it(gen) + + #print("population after selection\n", self.masv(self.population)) + #print("Report for generation", gen) + self.report() + + def report(self): + # post-evolution analysis + fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + self.fitnesses = fitnesses + self.sortedFitnesses = sorted(fitnesses) + self.sortedFitnesses.reverse() + self.fitness_dist(fitnesses) + + self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1] + print("best/worst w", self.bestFitness, self.worstFitness) + + self.bestGenome = tools.selBest(self.population, k=1) + # print(self.bestGenome) + + def ffmt(self, value, fmt="%3.2f"): + return fmt % value + + def fitness_dist(self, fitnesses): + listfit = [x[0] for x in fitnesses] + pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95]) + print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}") + + def driver(self): + # Initialize + self.setup() + # Generation loop + self.loop() + # Report + self.report() + self.log_it(self.maxgenerations) + print(self.masv(self.population)) + pass + +def holes_block_direct_route(): + # GridWorld as in 'gridworld.ipynb' + gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200) + ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + + +def holes_block_direct_route_two_goals(): + # GridWorld as in 'gridworld.ipynb' + gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200) + gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6)) + ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + + +def holes_block_direct_route_two_goals_left(): + # GridWorld as in 'gridworld.ipynb' + gahw = GoalsAndHolesWorld((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6), Agent, max_steps=200) + gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6)) + ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + +def holes_block_direct_route_two_goals_right(): + # GridWorld as in 'gridworld.ipynb' + gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200) + gahw.add_map((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6)) + ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + + + +def maxave(): + ma = MaxAve() + ew = EvolveWeights(ma, popsize = 100, maxgenerations=500) + ew.driver() + +if __name__ == "__main__": + + #holes_block_direct_route() + print("Two different goals") + holes_block_direct_route_two_goals() + print("Two environments, both have goal on left.") + holes_block_direct_route_two_goals_left() + print("Two environments, both have goal on right.") + holes_block_direct_route_two_goals_right() + + # maxave() + + pass diff --git a/code/pe.py b/code/pe.py new file mode 100755 index 0000000..dbe0122 --- /dev/null +++ b/code/pe.py @@ -0,0 +1,328 @@ +""" +pe.py + +puzzles evolving + +""" + + +import sys +# allow importing from the 'code/' dir +sys.path.append("../code") + +import os +import platform +import pickle +import json +import traceback +import datetime +import copy + +import numpy as np, itertools, copy +import matplotlib.pyplot as plt +from collections import defaultdict +import importlib # module reloading + +import environments +import agents + +# always forces a reload in case you have edited environments or agents +importlib.reload(environments) +importlib.reload(agents) +#from environments.gridworld import GridWorld +import environments.puzzle as pz +from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace + +from agents.q_agent import EvolvableAgent as Agent + +# DEAP imports + +import random +from deap import creator, base, tools, algorithms + +import multiprocessing + +#pool = multiprocessing.Pool() +#toolbox.register("map", pool.map) + +# Weight handling +from mda import MultiDimArray + +def isotime(): + return datetime.datetime.now().isoformat() + +def t2fn(timestamp): + timestamp = timestamp.replace('.','_') + timestamp = timestamp.replace(':','_') + return timestamp + +class Holder(object): + def __init__(self): + pass + + +class EvolveWeights(object): + """ + Class to apply DEAP to evolve a population consisting of a set + of weights. + """ + + def __init__(self, environ, + popsize=100, maxgenerations=10000, + cxpb=0.5, mtpb=0.05, + wmin=-20.0, wmax=20.0, + mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05, + tournsize=5, + tournk=2, + normalize_fitness=True, + tag='environ' + ): + self.tag = tag + self.starttime = isotime() + self.logbase = tag + "_" + t2fn(self.starttime) + + self.environ = environ + self.weights_len = environ.get_weights_len() + + self.popsize = popsize + self.maxgenerations = maxgenerations + self.cxpb = cxpb + self.mtpb = mtpb + self.wmin = wmin + self.wmax = wmax + self.mut_center = mut_center + self.mut_sigma = mut_sigma + self.mut_indpb = mut_indpb + self.tournsize = tournsize + self.tournk = tournk + self.normalize_fitness = normalize_fitness + pass + + def masv(self, pop): + mav = [] + maxs = [] + for ind in pop: + wts = [x for x in ind] + mav.append(np.mean(np.abs(wts))) + maxs.append(np.max(np.abs(wts))) + allmax = np.max(maxs) + mymasv = [x/allmax for x in mav] + return mymasv + + def cxTwoPointCopy(self, ind1, ind2): + """Execute a two points crossover with copy on the input individuals. The + copy is required because the slicing in numpy returns a view of the data, + which leads to a self overwriting in the swap operation. It prevents + :: + >>> import numpy as np + >>> a = np.array((1,2,3,4)) + >>> b = np.array((5,6,7,8)) + >>> a[1:3], b[1:3] = b[1:3], a[1:3] + >>> print(a) + [1 6 7 4] + >>> print(b) + [5 6 7 8] + """ + size = len(ind1) + cxpoint1 = random.randint(1, size) + cxpoint2 = random.randint(1, size - 1) + if cxpoint2 >= cxpoint1: + cxpoint2 += 1 + else: # Swap the two cx points + cxpoint1, cxpoint2 = cxpoint2, cxpoint1 + ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy() + return ind1, ind2 + + def zero(self): + return 0.0 + + def smallrandom(self, eps=None): + """ + Produce a small random number in [-eps .. eps]. + + A random variate in [-1 .. 1] is produced then + multiplied by eps, so the final range is in [-eps .. eps]. + + """ + if eps in [None]: + eps = self.environ.alpha + rv = ((2.0 * random.random()) - 1.0) * eps + return rv + + def setup(self): + creator.create("FitnessMax", base.Fitness, weights=(1.0,)) + creator.create("Individual", np.ndarray, fitness=creator.FitnessMax) + + self.toolbox = base.Toolbox() + + self.pool = multiprocessing.Pool() + self.toolbox.register("map", self.pool.map) + + #toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version + # self.toolbox.register("attr_float", random.random) + #self.toolbox.register("attr_float", self.zero) + self.toolbox.register("attr_float", self.smallrandom) + + self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len) + self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) + + # self.toolbox.register("evaluate", self.environ.evaluate) + self.toolbox.register("evaluate", self.environ.evaluate) + #toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version + self.toolbox.register("mate", self.cxTwoPointCopy) + #toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version + self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb) + self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk) + + def normalize_fitnesses(self, fitnesses): + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + return fitnesses + + def log_it(self, generation): + pool = self.pool + toolbox = self.toolbox + self.pool = None + self.toolbox = None + pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl" + pickle.dump(self, open(pklfn, "wb")) + self.pool = pool + self.toolbox = toolbox + + def loop(self): + self.population = self.toolbox.population(n=self.popsize) + #print(self.masv(self.population)) + NGEN=self.maxgenerations + for gen in range(NGEN): + print("generation", gen) + offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb) + # print("offspring", offspring) + # constrain genome values to [0,1] + for offspring_i,individual in enumerate(offspring): + np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax) + # print("clipped offspring", offspring) + # Evaluate the individuals with an invalid fitness (not yet evaluated) + # print("check fitness.valid") + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + # print("invalid_ind", len(invalid_ind)) + #print("setting fitness") + fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + """ + #print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + maxfitness = np.max([x[0] for x in fitnesses]) + #print("maxfitness", maxfitness) + listfit = [x[0] for x in fitnesses] + #print("listfit", listfit) + normfit = [x/maxfitness for x in listfit] + #print("normfit", normfit) + fitnesses = [tuple([x]) for x in normfit] + #print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + """ + print("fitnesses", ["%3.2f" % x[0] for x in fitnesses]) + self.fitness_dist(fitnesses) + # print("update ind fitness") + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + #print("selection") + #print("offspring\n", self.masv(offspring)) + self.offspring = offspring + self.population = self.toolbox.select(offspring, k=len(self.population)) + if 0 == gen % 100: + self.log_it(gen) + + #print("population after selection\n", self.masv(self.population)) + #print("Report for generation", gen) + self.report() + + def report(self): + # post-evolution analysis + fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population) + if self.normalize_fitness: + fitnesses = self.normalize_fitnesses(fitnesses) + self.fitnesses = fitnesses + self.sortedFitnesses = sorted(fitnesses) + self.sortedFitnesses.reverse() + self.fitness_dist(fitnesses) + + self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1] + print("best/worst w", self.bestFitness, self.worstFitness) + + self.bestGenome = tools.selBest(self.population, k=1) + # print(self.bestGenome) + + def ffmt(self, value, fmt="%3.2f"): + return fmt % value + + def fitness_dist(self, fitnesses): + listfit = [x[0] for x in fitnesses] + pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95]) + print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}") + + def driver(self): + # Initialize + self.setup() + # Generation loop + self.loop() + # Report + self.report() + self.log_it(self.maxgenerations) + print(self.masv(self.population)) + pass + + +def puzzles_exp_1(): + '''full test of the conveyorbelt world + + >>> import copy + >>> maxrewards = [1] + >>> easy_features = [[0,1],[0,1],[3,1],[0,0]] + >>> easy_rewards = [-1,-1,-1,1] + >>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]]) + >>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards) + >>> p2 = copy.deepcopy(p1) + >>> puzzles = (p1,p2) + >>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False) + >>> world.append(p1) + >>> world.append(p2) +: +''' + maxrewards = [1] + easy_features = [[0,1],[0,1],[3,1],[0,0]] + easy_rewards = [-1,-1,-1,1] + easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]]) + p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards) + p2 = copy.deepcopy(p1) + puzzles = (p1, p2) + world = ConvBelt(actionSpace = getActionSpace(puzzles), + observationSpace = getObservationSpace(puzzles), + maxRewards = maxrewards, + agentclass=Agent, + randomize = False, alpha=0.005) + world.append(p1) + world.append(p2) + + environ = Holder() + environ.world = world + + + ew = EvolveWeights(world, popsize=100, maxgenerations=1000, tournsize=75, tournk=3, normalize_fitness=False) + ew.driver() + + + +if __name__ == "__main__": + print("pe.py start...") + + + puzzles_exp_1() + + print("pe.py done.") diff --git a/code/ress.py b/code/ress.py new file mode 100644 index 0000000..4d03b1d --- /dev/null +++ b/code/ress.py @@ -0,0 +1,254 @@ +"""RESS.py + +Random Equal Stimulus Sets + +Originally coded in Object Pascal for Delphi by Wesley R. Elsberry +around 1999. + +Translation to Python 3 by ChatGPT (GPT-4) 2023-06-01. + +Random Equal Stimulus Sets are sequences of numbers indicating one of +a set of stimuli to be presented to a subject in a cognitive or +psychophysics task. The basic rules for generating these sequences is +derived from Gellermann 1925(?), but modified to permit the +specification of more than two stimuli in the set. The restriction on +a maximum of three sequential presentations of the same stimulus is +retained. + +Issues: + The 'next_yield' method does not work. + Using 'next' for a sequence longer than the defined length of + sequence can cause there to be sequences that violate Gellermann's + assumptions, as the sequences composed together are not tested + across the joins. + +""" + +import sys +import os +import traceback + +import random + +MAXRESS = 120 # Arbitrary maximum + +class RESS: + """ + RESS class represents the equivalent of the Pascal unit 'ress' in Python. + +Random Equal Stimulus Sets are sequences of numbers indicating one of +a set of stimuli to be presented to a subject in a cognitive or +psychophysics task. The basic rules for generating these sequences is +derived from Gellermann 1925(?), but modified to permit the +specification of more than two stimuli in the set. The restriction on +a maximum of three sequential presentations of the same stimulus is +retained. + + """ + + def __init__(self): + self.classes = None + self.thelength = None + self.series = [0] * MAXRESS + self.lastseries = [0] * MAXRESS + self.cnt = None + self.seriesstr = "" + self.current = None + self.dummy = None + self.hist = [0] * 61 + + def init(self): + """ + Initializes the variables in TRESS. + """ + self.classes = 1 + self.thelength = 0 + self.series = [0] * MAXRESS + self.lastseries = [0] * MAXRESS + self.hist = [0] * 61 + self.cnt = 0 + self.seriesstr = "" + self.dummy = 0 + + def makestring(self): + """ + Creates a string representation of the series. + Returns: + The string representation of the series. + """ + tstr = "" + for val in self.series[1:self.thelength + 1]: + tstr += str(val) + self.seriesstr = tstr + return tstr + + def generate(self, len, nclass): + """ + Generates a candidate series. + Args: + len: The length of the series. + nclass: The number of classes. + """ + self.cnt = 0 + self.classes = nclass + + # Constraint: sequence length less than maximum + if MAXRESS >= len: + self.thelength = len + else: + self.thelength = MAXRESS + + # Constraint: Multiple of number of classes + if self.thelength % self.classes != 0: + self.thelength -= self.thelength % self.classes + + for i in range(self.classes): + self.hist[i] = self.thelength // self.classes + + self.series[0] = random.randint(0, self.classes - 1) + self.hist[self.series[0]] -= 1 + + run = 1 + for i in range(1, self.thelength): + ctr = 0 + while True: + ctr += 1 + jj = random.randint(0, self.classes - 1) + if self.hist[jj] > 0: + shortrun = (self.series[i - 1] == jj and run < 3) or (self.series[i - 1] != jj) + break + if ctr > 100: + break + if self.series[i - 1] == jj: + run += 1 + else: + run = 1 + self.hist[jj] -= 1 + self.series[i] = jj + + def test(self): + """ + Tests candidates for criteria. + Returns: + True if the series is valid, False otherwise. + """ + ok = True + hist = [0] * 61 + + for val in self.series[:self.thelength]: + hist[val] += 1 + + for i in range(self.classes - 1): + if hist[i] != hist[i + 1]: + ok = False + + if ok: + run = 1 + for i in range(1, self.thelength): + if self.series[i - 1] == self.series[i]: + run += 1 + if run > 3: + ok = False + else: + run = 1 + + return ok + + def newress(self, nlen=24, nclass=2): + """ + Finds and saves a valid series using generate and test. + Args: + nlen: The length of the series. + nclass: The number of classes. + """ + print('nlen', nlen, 'nclass', nclass) + try: + + random.seed() + + self.lastseries = self.series + + while True: + self.generate(nlen, nclass) + # print("gen", self.makestring()) + if self.test(): + break + return self.makestring() + except: + estr = f"Error: {traceback.format_exc()}" + print(estr) + return '' + + def next(self): + """ + Returns the next value within a series. + Returns: + The next value in the series. + """ + if self.cnt >= self.thelength: + self.newress(self.thelength, self.classes) + + self.cnt += 1 + self.current = self.series[self.cnt] + return self.series[self.cnt] + + def next_yield(self): + """ + Yields the next value within a series. + """ + print('start', self.series, self.cnt, self.series[self.cnt]) + + while True: + if self.cnt >= self.thelength: + print("calling newress") + self.newress(self.thelength, self.classes) + self.cnt = 0 + + print(self.cnt) + print(self.series, self.cnt, self.series[self.cnt]) + self.current = self.series[self.cnt] + yield str(self.current) + self.cnt += 1 + +# Exercise the TRESS code + +from random import seed + +def main(): + # Set the seed for random number generation + seed() + + # Create an instance of the TRESS class + ress1 = RESS() + + # Initialize the TRESS object + ress1.init() + + # Generate and print a valid series + ress1.newress(24, 3) + series = ress1.makestring() + print("Generated Series:", series) + + ress1.newress(24, 3) + series = ress1.makestring() + print("Generated Series:", series) + + ress1.newress(24, 3) + series = ress1.makestring() + print("Generated Series:", series) + + ress1.newress(24, 3) + series = ress1.makestring() + print("Generated Series:", series) + + ress1.newress(24, 3) + series = ress1.makestring() + print("Generated Series:", series) + + # Generate and print the next value in the series + for ii in range(26): + next_val = ress1.next() + print(ii, "Next Value:", str(next_val)) + +if __name__ == "__main__": + main() diff --git a/deactivate_env.sh b/deactivate_env.sh new file mode 100644 index 0000000..ad6f22a --- /dev/null +++ b/deactivate_env.sh @@ -0,0 +1 @@ +micromamba deactivate diff --git a/jupyter.sh b/jupyter.sh new file mode 100644 index 0000000..e184b34 --- /dev/null +++ b/jupyter.sh @@ -0,0 +1,8 @@ +UMAMBA_PATH="umamba_env" +if [ ! -d "$UMAMBA_PATH" ]; then + echo "no $UMAMBA_PATH found" + . ./update_env.sh +fi +. ./activate_env.sh +micromamba activate curio +jupyter-lab diff --git a/notebooks/gridworld.ipynb b/notebooks/gridworld.ipynb new file mode 100644 index 0000000..91a397e --- /dev/null +++ b/notebooks/gridworld.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "bf316089-5339-4ac8-b0e2-3618fe06a593", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np, itertools, copy\n", + "import matplotlib.pyplot as plt\n", + "from collections import defaultdict\n", + "import importlib # module reloading\n", + "\n", + "# allow importing from the 'code/' dir\n", + "import sys\n", + "sys.path.append(\"../code\")\n", + "\n", + "import environments\n", + "import agents\n", + "# always forces a reload in case you have edited environments or agents\n", + "importlib.reload(environments)\n", + "importlib.reload(agents)\n", + "from environments.gridworld import GridWorld\n", + "from agents.q_agent import Agent\n", + "\n", + "# problem domain dependent settings\n", + "dims = [4,12]\n", + "obsSpace, actSpace = (dims[0], dims[1]), (4,)\n", + "num_trials=1000\n", + "n_actions = 4\n", + "#(optimal lmbda in the agent is domain dependent - could be evolved)\n", + "HARD_TIME_LIMIT = 50\n", + "KILLED_REWARD = -10\n", + "#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n", + "#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n", + "\n", + "# create our own GridWorld that adheres to openAI-gym environment API during training\n", + "env = GridWorld(dims = dims, startState = [3,0])\n", + "\n", + "# 4rows x 12columns (0,0) is top-left\n", + "# -: empty location\n", + "# S: Start location\n", + "# G: Goal location\n", + "# x: immediate fail (a hole / cliff)\n", + "#\n", + "# (map of grid world)\n", + "# ------------\n", + "# ------------\n", + "# ------------\n", + "# SxxxxxxxxxxG\n", + "\n", + "# add goals and holes\n", + "# supports multiple goals, use 1 for now\n", + "env.goals.append([3,11])\n", + "# support multiple 'kill zones' (cliff edge, in openAI parlance)\n", + "for i in range(1,11):\n", + " env.holes.append([3,i])\n", + " \n", + "agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n", + "# alpha # how much to weigh reward surprises that deviate from expectation\n", + "# gamma # how important exepcted rewards will be\n", + "# epsilon # fraction of exploration to exploitation (how often to choose a random action)\n", + "# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=\n", + "\n", + "\n", + "time_to_solve_each_trial = [] # lower is better\n", + "for trialN in range(num_trials):\n", + " # some output to see it running\n", + " if (trialN % 10) == 0: print('.',end='')\n", + " # initialize the agent, environment, and time for this trial\n", + " agent.reset() # soft-reset() (keeps learned weights)\n", + " nextState = env.reset()\n", + " time = 0\n", + " while True:\n", + " time += 1\n", + " # set agent senses based on environment and allow agent to determine an action\n", + " agent.sensoryState = nextState\n", + " agent.plasticUpdate()\n", + " # determine effect on environment state & any reward (in standard openAI-gym API format)\n", + " nextState, reward, goal_achieved, _ = env.step(agent.action)\n", + " agent.reward = reward\n", + " if goal_achieved or time == HARD_TIME_LIMIT: break\n", + " # stop trial if agent explitly failed early\n", + " elif reward <= KILLED_REWARD:\n", + " agent.sensoryState = nextState\n", + " agent.reward = reward\n", + " agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward\n", + " agent.reset()\n", + " nextState = env.reset()\n", + " # record trial results\n", + " time_to_solve_each_trial.append(time)\n", + " \n", + "print()\n", + "plt.plot(time_to_solve_each_trial);\n", + "pt=15 # font point\n", + "plt.title('Time until agent solved trial', fontsize=pt)\n", + "plt.xlabel('Trial', fontsize=pt)\n", + "plt.ylabel('Time', fontsize=pt)\n", + "\n", + "# show path agent took in GridWorld using non-learning agent (staticUpdate())\n", + "print(\"green dot: start location\")\n", + "print(\"red dot: finish location\")\n", + "env.render(agent)\n", + "#render(agent,env)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d54a622f-42e4-4384-bf9a-0f0181301c3c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/puzzlev0.ipynb b/notebooks/puzzlev0.ipynb new file mode 100644 index 0000000..33b159e --- /dev/null +++ b/notebooks/puzzlev0.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b067867a-c1bc-4769-a6ac-15e7277ab8e2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np, itertools, copy\n", + "import matplotlib.pyplot as plt\n", + "from collections import defaultdict\n", + "import importlib # module reloading\n", + "\n", + "# allow importing from the 'code/' dir\n", + "import sys\n", + "sys.path.append(\"../code\")\n", + "\n", + "import environments\n", + "import agents\n", + "# always forces a reload in case you have edited environments or agents\n", + "importlib.reload(environments)\n", + "importlib.reload(agents)\n", + "from environments.puzzle import Puzzle, ConvBelt, Action, getActionSpace, getObservationSpace\n", + "from agents.q_agent import Agent\n", + "\n", + "import copy # allows duplicating puzzles into unique puzzles, otherwise python refs are shallow-copied\n", + "maxrewards = [1] # could have multiple levels of 'goodness'\n", + "\n", + "# Create a puzzle with 4 states:\n", + "# state 0: first presentation\n", + "# state 1: getting passed over, advancing on belt (not really a state, more a placeholder)\n", + "# state 2: investigated (more sensory information is available when examined closely)\n", + "# state 3: consumed (saturating state with possible reward)\n", + "easy_puzzle_tt = np.array([[0,0,2,3], # state 0: first presentation\n", + " [0,0,0,0], # state 1: getting passed over (placeholder)\n", + " [2,0,2,3], # state 2: investigated\n", + " [3,3,3,3]]) # state 3: consumed\n", + "# example puzzle with 2 sensorial dimensions\n", + "easy_puzzle_features = [[0,1], # state 0: Empty/Unknown & Spikes\n", + " [0,1], # state 1: Empty/Unknown & Spikes\n", + " [3,1], # state 2: Red & Spikes\n", + " [0,0]] # state 3: Empty/Unknown & Empty/Unknown\n", + "easy_puzzle_rewards = [-1, # state 0: first look\n", + " -1, # state 1: proceeding to next puzzle (placeholder)\n", + " -1, # state 2: investigate\n", + " 1] # state 3: consume (could be -10 poisonous! or -1 empty/useless)\n", + "p1 = Puzzle(tt = easy_puzzle_tt,\n", + " features = easy_puzzle_features,\n", + " rewards = easy_puzzle_rewards)\n", + "p2 = copy.deepcopy(p1)\n", + "puzzles = (p1,p2)\n", + "\n", + "\n", + "obsSpace = getObservationSpace(puzzles)\n", + "actSpace = getActionSpace(puzzles)\n", + "\n", + "\n", + "env = ConvBelt(actionSpace = getActionSpace(puzzles), # indicate number of actions agent can take\n", + " observationSpace = getObservationSpace(puzzles), # indicate number of sensorial dimensions and sizes\n", + " maxRewards = maxrewards, # rewards that constitute postive rewards\n", + " randomize = False, # randomize puzzle positions on belt at each reset()\n", + " )\n", + "\n", + "# can use append() or extend()\n", + "env.append(p1)\n", + "env.append(p2)\n", + "\n", + "# domain-specific settings\n", + "num_trials=200\n", + "n_actions = 4\n", + "#(optimal lmbda in the agent is domain dependent - could be evolved)\n", + "HARD_TIME_LIMIT = 600\n", + "#KILLED_REWARD = -10 # not used here\n", + "#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n", + "#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n", + "\n", + "agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n", + "# alpha # how much to weigh reward surprises that deviate from expectation\n", + "# gamma # how important exepcted rewards will be\n", + "# epsilon # fraction of exploration to exploitation (how often to choose a random action)\n", + "# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=\n", + "\n", + "time_to_solve_each_trial = []\n", + "rewards = []\n", + "\n", + "for trialN in range(num_trials):\n", + " # some output to see it running\n", + " if (trialN % 10) == 0: print('.',end='')\n", + " # initialize the agent, environment, and time for this trial\n", + " agent.reset() # soft-reset() (keeps learned weights)\n", + " nextState = env.reset()\n", + " time = 0\n", + " while True:\n", + " time += 1\n", + " # set agent senses based on environment and allow agent to determine an action\n", + " agent.sensoryState = nextState\n", + " agent.plasticUpdate()\n", + " # determine effect on environment state & any reward (in standard openAI-gym API format)\n", + " nextState, reward, goal_achieved, _ = env.step(agent.action)\n", + " agent.reward = reward\n", + " if env.puzzlesLeftToComplete == 0 or time == HARD_TIME_LIMIT:\n", + " agent.plasticUpdate()\n", + " break\n", + " # could have deadly rewards that stop the trial early\n", + " #elif reward <= -10:\n", + " # agent.sensoryState = nextState\n", + " # agent.reward = reward\n", + " # agent.plasticUpdate()\n", + " # agent.reset()\n", + " # nextState = env.reset()\n", + " rewards.append(reward)\n", + " time_to_solve_each_trial.append(time)\n", + " \n", + " \n", + "print()\n", + "print(list(agent.weights.round(3)))\n", + "#print(agent.timeSinceBigSurprise)\n", + "plt.figure(figsize=(16,4),dpi=200)\n", + "plt.plot(time_to_solve_each_trial)\n", + "pt=15 # font point\n", + "plt.title('Time until agent solved trial (puzzle boxes)', fontsize=pt)\n", + "plt.xlabel('Trial', fontsize=pt)\n", + "plt.ylabel('Time', fontsize=pt)\n", + "#figure()\n", + "#plot(rewards)\n", + "env.render(agent);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e22a5e6-47fb-45c0-905f-3fb5b6cc3980", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/papers/bibliography.bib b/papers/bibliography.bib new file mode 100644 index 0000000..3391443 --- /dev/null +++ b/papers/bibliography.bib @@ -0,0 +1,40 @@ +% uses machine learning to facilitate automatic olfactory classification. +% Intro discusses how smells are smelled. +% PDF: https://arxiv.org/pdf/1906.07067 +@article{imam2020rapid, + title={Rapid online learning and robust recall in a neuromorphic olfactory circuit}, + author={Imam, Nabil and Cleland, Thomas A}, + journal={Nature Machine Intelligence}, + volume={2}, + number={3}, + pages={181--191}, + year={2020}, + publisher={Nature Publishing Group} +} + +% PDF: https://search.proquest.com/docview/1297102848?pq-origsite=gscholar&imgSeq=1 +@article{gellermann1933chance, + title={Chance orders of alternating stimuli in visual discrimination experiments}, + author={Gellermann, Louis W}, + journal={The journal of genetic psychology}, + volume={42}, + pages={206--208}, + year={1933}, + publisher={Journal Press, etc.} +} + +% PDF: https://static1.squarespace.com/static/5b82081250a54f02ee0758c8/t/5b8ed5a04fa51a484aa907ee/1536087459872/tinbergen+original.pdf +% Also uploaded to repository. +@article{Tinbergen1963Jan, + author = {Tinbergen, N.}, + title = {{On aims and methods of Ethology}}, + journal = {Z. Tierpsychol.}, + volume = {20}, + number = {4}, + pages = {410--433}, + year = {1963}, + month = {Jan}, + issn = {0044-3573}, + publisher = {John Wiley {\&} Sons, Ltd}, + doi = {10.1111/j.1439-0310.1963.tb01161.x} +} diff --git a/papers/narrative.pdf b/papers/narrative.pdf new file mode 100644 index 0000000..49c1add Binary files /dev/null and b/papers/narrative.pdf differ diff --git a/papers/week_02_tinbergen_on_aims_and_methods_of_ethology_zft_1963.pdf b/papers/week_02_tinbergen_on_aims_and_methods_of_ethology_zft_1963.pdf new file mode 100644 index 0000000..7dfc632 Binary files /dev/null and b/papers/week_02_tinbergen_on_aims_and_methods_of_ethology_zft_1963.pdf differ diff --git a/requirements-conda.txt b/requirements-conda.txt new file mode 100644 index 0000000..7975c34 --- /dev/null +++ b/requirements-conda.txt @@ -0,0 +1,7 @@ +python=3.11 +jupyter +numpy +matplotlib +plotnine +nodejs +deap diff --git a/requirements-pip.txt b/requirements-pip.txt new file mode 100644 index 0000000..c9356a7 --- /dev/null +++ b/requirements-pip.txt @@ -0,0 +1 @@ +jupyterlab diff --git a/update_env.sh b/update_env.sh new file mode 100644 index 0000000..9066101 --- /dev/null +++ b/update_env.sh @@ -0,0 +1,47 @@ + +OS="linux" +if [[ "$OSTYPE" == "darwin"* ]]; then + OS="osx" +fi + +ARCH="64" +if [[ "$(uname -m)" == "aarch64" ]]; then + if [[ "$OS" == "osx" ]]; then + ARCH="arm64" + else + ARCH="aarch64" + fi +fi + +SYSTEM="$OS-$ARCH" + +# conda deactivate in case they have a conda env +# micromamba deactivate in case they have a micromamba env +conda deactivate &>/dev/null +micromamba deactivate &>/dev/null + +UMAMBA_PATH="umamba_env" +if [ ! -d "umamba_env" ]; then + # download micromamba + echo "downloading micromamba to $UMAMBA_PATH/ ..." + curl -Ls https://micro.mamba.pm/api/micromamba/${SYSTEM}/latest | tar -xvj bin/micromamba + mv bin $UMAMBA_PATH + # activate micromamba + export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH + eval "$(./umamba_env/micromamba shell hook -s posix)" + # create the project environment + echo "creating 'curio' environment" + micromamba create -n curio -c conda-forge + micromamba activate curio +else + echo "found micromamba at $UMAMBA_PATH" + micromamba activate curio + export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH + eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)" +fi +echo "installing packages" +# install conda requirements +micromamba install --yes $(tr '\n' ' ' < requirements-conda.txt) -c conda-forge +# install pip requirements +pip install --no-input -r requirements-pip.txt +micromamba deactivate