Copied Jory's repo in
This commit is contained in:
parent
1c295d9c40
commit
67b9c88cba
27
README.md
27
README.md
|
|
@ -1,3 +1,26 @@
|
|||
# alice
|
||||
# curiosity
|
||||
|
||||
### quickstart
|
||||
* `. ./jupyter.sh` runs jupyter-lab (setting everything up if necessary)
|
||||
|
||||
Note, run scripts using `source <scriptname>` or `. ./<scriptname>`
|
||||
|
||||
### helper scripts
|
||||
* `. ./update_env.sh` creates or updates the project python environment
|
||||
* `. ./activate_env.sh` activate the project environment (calling update if missing)
|
||||
* `. ./deactivate_env.sh` deactivate the project environment
|
||||
* `. ./jupyter.sh` runs jupyter-lab (calling activate for safety)
|
||||
|
||||
### structure
|
||||
|
||||
```
|
||||
├── code
|
||||
│ ├── agents/ # agent algorithms
|
||||
│ ├── environments/ # test environments
|
||||
│ └── evolve.py # sample evolution code
|
||||
├── notebooks/ # example notebooks
|
||||
├── papers/ # useful shared docs
|
||||
├── requirements-conda.txt # conda project dependencies
|
||||
├── requirements-pip.txt # pip project dependencies (sometimes necessary)
|
||||
```
|
||||
|
||||
ALICE is a project to explore curiosity in a model incorporating both reinforcement learning and evolutionary processes.
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
# conda deactivate in case they have a conda env
|
||||
# micromamba deactivate in case they have a micromamba env
|
||||
conda deactivate &> /dev/null
|
||||
micromamba deactivate &>/dev/null
|
||||
|
||||
UMAMBA_PATH="umamba_env"
|
||||
if [ ! -d "$UMAMBA_PATH" ]; then
|
||||
echo "no $UMAMBA_PATH found"
|
||||
. ./update_env.sh
|
||||
fi
|
||||
export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
|
||||
eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)"
|
||||
micromamba activate curio
|
||||
|
|
@ -0,0 +1,365 @@
|
|||
"""
|
||||
q_agent.py
|
||||
This submodule contains the Agent class, which implements a Q-learning agent with eligibility traces (TD-lambda). The agent learns to make decisions based on its sensory state and rewards received from the environment. The agent uses an epsilon-greedy action-selection strategy.
|
||||
|
||||
Usage:
|
||||
import q_agent
|
||||
|
||||
Class:
|
||||
Agent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96)
|
||||
|
||||
Attributes:
|
||||
obsSpace (tuple): The shape of the observation space.
|
||||
actSpace (tuple): The shape of the action space.
|
||||
ftrSpace (tuple): The shape of the feature space.
|
||||
n_features (int): The total number of features.
|
||||
n_actions (int): The total number of actions.
|
||||
weights (numpy.ndarray): The Q-function weights.
|
||||
trace (numpy.ndarray): The eligibility trace for each feature.
|
||||
featureToIndexMap (numpy.ndarray): A mapping from feature indices to the corresponding weights.
|
||||
allActions (list): A list of all possible actions.
|
||||
alpha (float): The learning rate for updating weights.
|
||||
gamma (float): The discount factor for future rewards.
|
||||
epsilon (float): The exploration rate for epsilon-greedy action selection.
|
||||
lmbda (float): The decay factor for eligibility traces.
|
||||
sensoryState (numpy.ndarray): The current sensory state of the agent.
|
||||
previousSensoryState (numpy.ndarray): The previous sensory state of the agent.
|
||||
action (int): The current action taken by the agent.
|
||||
previousAction (int): The previous action taken by the agent.
|
||||
episoden (int): The episode number the agent is in.
|
||||
recentReset (bool): Indicates if the agent was recently reset.
|
||||
|
||||
Methods:
|
||||
reset():
|
||||
Resets the agent's traces, sensory states, and actions.
|
||||
|
||||
predictPayoffsForAllActions() -> List[float]:
|
||||
Predicts the expected payoffs for all possible actions given the current sensory state.
|
||||
|
||||
plasticUpdate():
|
||||
Updates the agent's Q-function weights and eligibility traces based on the current sensory state, action, and received reward. Uses epsilon-greedy action selection.
|
||||
|
||||
staticUpdate():
|
||||
Updates the agent's action based on the current sensory state without updating weights or traces. Uses greedy action selection.
|
||||
|
||||
Examples:
|
||||
>>> from q_agent import Agent
|
||||
>>> obsSpace, actSpace = (2, 2), (3,)
|
||||
>>> agent = Agent(obsSpace=obsSpace, actSpace=actSpace)
|
||||
"""
|
||||
|
||||
import traceback
|
||||
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
LOGGING = False
|
||||
|
||||
import logging, sys
|
||||
logging.basicConfig(stream=sys.stdout,level=logging.INFO)
|
||||
log = logging.getLogger()
|
||||
|
||||
if not LOGGING:
|
||||
# remove all logging functionality
|
||||
for handler in log.handlers.copy():
|
||||
try:
|
||||
log.removeHandler(handler)
|
||||
except ValueError: # in case another thread has already removed it
|
||||
pass
|
||||
log.addHandler(logging.NullHandler())
|
||||
log.propagate = False
|
||||
|
||||
|
||||
# The Agent class, similar to what
|
||||
# is used in MABE. Note: this is unlike
|
||||
# how standard RLML folks structure these
|
||||
# algorithms. Here, we separate out concerns
|
||||
# for modularity. A side-effect is that the
|
||||
# update() (one cognitive step) receives the reward
|
||||
# for the previous update-action. This means 1 extra
|
||||
# update must be called if terminating.
|
||||
class Agent():
|
||||
|
||||
|
||||
def __init__(i, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96):
|
||||
i.obsSpace = np.array(obsSpace)
|
||||
i.actSpace = np.array(actSpace)
|
||||
i.ftrSpace = tuple(obsSpace)+tuple(actSpace)
|
||||
i.n_features = np.prod(i.ftrSpace)
|
||||
i.n_actions = actSpace[0] # not general
|
||||
i.weights = np.zeros(i.n_features)
|
||||
i.trace = np.zeros(i.n_features)
|
||||
i.featureToIndexMap = np.arange(i.n_features).reshape(i.ftrSpace)
|
||||
i.allActions = list(range(i.n_actions))
|
||||
# new
|
||||
i.alpha = alpha # how much to weigh reward surprises that deviate from expectation
|
||||
i.gamma = gamma # how important exepcted rewards will be
|
||||
i.epsilon = epsilon # fraction of exploration to exploitation (how often to choose a random action)
|
||||
i.lmbda = lmbda # how important preceeding actions are in learning adaptation
|
||||
i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
|
||||
i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
|
||||
i.action = 0
|
||||
i.previousAction = 0
|
||||
i.episoden = 0
|
||||
i.recentReset = True
|
||||
|
||||
|
||||
def reset(i): # only resets traces
|
||||
log.info("resetting agent")
|
||||
i.trace = np.zeros(i.n_features)
|
||||
i.sensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
|
||||
i.previousSensoryState = np.zeros(len(i.obsSpace),dtype=np.int32)
|
||||
i.action = 0
|
||||
i.previousAction = 0
|
||||
i.reward = -1
|
||||
i.recentReset = True
|
||||
|
||||
|
||||
def predictPayoffsForAllActions(i) -> List[float]:
|
||||
'''combines current sensoryState and all possible actions to return all possible payoffs by action
|
||||
>>> obsSpace, actSpace, ftrSpace = (2,2), (3,), (2,2)+(3,)
|
||||
>>> i = Agent(obsSpace=obsSpace, actSpace=actSpace)
|
||||
>>> (i.featureToIndexMap == np.arange(i.n_features).reshape((2,2,3))).all()
|
||||
True
|
||||
>>> i.sensoryState[:] = [1,0]
|
||||
>>> i.weights = np.zeros(12)
|
||||
>>> i.weights[6:9] = [1.,2.,3.] # weights associated with features (1,0,<action>) with actions 0,1,2
|
||||
>>> i.predictPayoffsForAllActions()
|
||||
[1.0, 2.0, 3.0]
|
||||
'''
|
||||
#print(i.sensoryState, i.allActions)
|
||||
try:
|
||||
featureKeys = [tuple(i.sensoryState)+(action,) for action in i.allActions]
|
||||
# featuresForEachAction = [i.featureToIndexMap[tuple(i.sensoryState)+(action,)] for action in i.allActions]
|
||||
featuresForEachAction = [i.featureToIndexMap[fki] for fki in featureKeys]
|
||||
#print('featureToIndexMap', i.featureToIndexMap)
|
||||
#print('featureKeys', featureKeys)
|
||||
#print('sensoryState', i.sensoryState, 'allActions', i.allActions)
|
||||
return [i.weights[features].sum() for features in featuresForEachAction]
|
||||
except:
|
||||
estr = f"Error: {traceback.format_exc()}"
|
||||
print(estr)
|
||||
print('featureToIndexMap', i.featureToIndexMap)
|
||||
print('featureKeys', featureKeys)
|
||||
print('sensoryState', i.sensoryState, 'allActions', i.allActions)
|
||||
return [np.nan for x in range(len(i.allActions))]
|
||||
|
||||
|
||||
|
||||
def plasticUpdate(i):
|
||||
# This algorithm is a TD-lambda algorithm
|
||||
# with epsilon-greedy action-selection
|
||||
# (could use annealing of the epsilon - I removed it again)
|
||||
|
||||
# determine predicted payoff
|
||||
nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff
|
||||
nextAction = 0
|
||||
# epsilon-greedy action-selection
|
||||
# choose random
|
||||
if np.random.random() < i.epsilon: # random
|
||||
nextAction = np.random.choice(i.n_actions)
|
||||
else: # choose best
|
||||
try:
|
||||
q_vals = i.predictPayoffsForAllActions()
|
||||
nextAction = np.argmax(q_vals)
|
||||
if i.reward >= 0.0: # goal achieved
|
||||
nextActionPredictedPayoff = 0.0
|
||||
else:
|
||||
nextActionPredictedPayoff = q_vals[nextAction]
|
||||
except:
|
||||
estr = f"Error: {traceback.format_exc()}"
|
||||
print(estr)
|
||||
print("q_vals", q_vals)
|
||||
# only update weights if accumulated at least 1 experience
|
||||
if not i.recentReset:
|
||||
# determine the corrected payoff version given the reward actually received
|
||||
previousActionCorrectedPayoff = i.reward + (nextActionPredictedPayoff * i.gamma)
|
||||
# use this information to update weights for last action-selection based on how surprised we were
|
||||
features = i.featureToIndexMap[tuple(i.previousSensoryState)+(i.action,)]
|
||||
previousActionPredictedPayoff = i.weights[features].sum()
|
||||
surprise = previousActionCorrectedPayoff - previousActionPredictedPayoff
|
||||
# do weight updates
|
||||
i.trace[features] = 1.0
|
||||
# do trace updates
|
||||
i.weights += i.alpha * surprise * i.trace
|
||||
i.trace *= i.lmbda
|
||||
# keep track of state and action t, t-1
|
||||
i.previousSensoryState = i.sensoryState[:]
|
||||
i.action = nextAction
|
||||
i.recentReset = False
|
||||
|
||||
|
||||
def staticUpdate(i):
|
||||
# same as plasticUpdate, but without learning
|
||||
# (a.k.a. 'deployment')
|
||||
|
||||
# determine predicted payoff
|
||||
nextActionPredictedPayoff = 0.0 # used to find surprise between expected and received payoff
|
||||
nextAction = 0
|
||||
# greedy action-selection
|
||||
q_vals = i.predictPayoffsForAllActions()
|
||||
nextAction = np.argmax(q_vals)
|
||||
# step the storage of state and action in memory
|
||||
i.previousSensoryState = i.sensoryState[:]
|
||||
i.action = nextAction
|
||||
|
||||
|
||||
"""
|
||||
This derived class adds a mutation_rate attribute, as well as methods for mutation, crossover, and fitness handling. You can then use an evolutionary algorithm to evolve a population of EvolvableAgent instances by applying selection, crossover, and mutation operations based on the agents' fitness values.
|
||||
"""
|
||||
|
||||
def tuple_shape(input_tuple):
|
||||
if not isinstance(input_tuple, tuple):
|
||||
try:
|
||||
return input_tuple.shape
|
||||
except:
|
||||
raise TypeError("Input must be a tuple")
|
||||
|
||||
# Check if the tuple is nested (i.e., if it's a multidimensional tuple)
|
||||
if any(isinstance(item, tuple) for item in input_tuple):
|
||||
shape = []
|
||||
while isinstance(input_tuple, tuple):
|
||||
shape.append(len(input_tuple))
|
||||
input_tuple = input_tuple[0]
|
||||
return tuple(shape)
|
||||
else:
|
||||
return (len(input_tuple),)
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class EvolvableAgent(Agent):
|
||||
""" EvolvableAgent
|
||||
This class extends the Agent class from q_agent.py, adding functionality for evolutionary computation. The EvolvableAgent class can be used with evolutionary algorithms to optimize the agent's performance through mutation, crossover, and selection based on fitness values.
|
||||
|
||||
Usage:
|
||||
import EvolvableAgent
|
||||
|
||||
Class:
|
||||
EvolvableAgent(obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, mutation_rate=0.05)
|
||||
|
||||
Attributes (in addition to Agent attributes):
|
||||
mutation_rate (float): The probability of each weight being mutated during mutation.
|
||||
fitness (float): The fitness value of the agent, used for evaluation and selection in an evolutionary algorithm.
|
||||
|
||||
Methods (in addition to Agent methods):
|
||||
mutate():
|
||||
Mutates the agent's weights by adding small random values, drawn from a normal distribution. The mutation_rate attribute determines the probability of each weight being mutated.
|
||||
|
||||
csharp
|
||||
Copy code
|
||||
crossover(other: 'EvolvableAgent') -> 'EvolvableAgent':
|
||||
Performs uniform crossover between this agent and another agent, creating a new offspring agent.
|
||||
Args:
|
||||
other (EvolvableAgent): The other agent to perform crossover with.
|
||||
Returns:
|
||||
EvolvableAgent: The offspring agent resulting from the crossover.
|
||||
|
||||
set_fitness(fitness: float):
|
||||
Sets the fitness value for the agent.
|
||||
Args:
|
||||
fitness (float): The fitness value to be set.
|
||||
|
||||
get_fitness() -> float:
|
||||
Gets the fitness value of the agent.
|
||||
Returns:
|
||||
float: The fitness value of the agent.
|
||||
Examples:
|
||||
>>> from EvolvableAgent import EvolvableAgent
|
||||
>>> obsSpace, actSpace = (2, 2), (3,)
|
||||
>>> agent = EvolvableAgent(obsSpace=obsSpace, actSpace=actSpace, mutation_rate=0.05)
|
||||
"""
|
||||
def __init__(self, obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96, \
|
||||
mutation_rate=0.05, crossover_rate=0.01, fitness=None):
|
||||
# obsSpace, actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.96
|
||||
super().__init__(obsSpace, actSpace, alpha, gamma, epsilon, lmbda)
|
||||
self.germline = self.weights
|
||||
self.mutation_rate = mutation_rate
|
||||
self.crossover_rate = crossover_rate
|
||||
self.wfitness = None
|
||||
self.fitness = fitness
|
||||
self.init_fitness = fitness
|
||||
|
||||
def mutate(self):
|
||||
"""
|
||||
Mutate the agent's weights by adding small random values, drawn from a normal distribution.
|
||||
The mutation_rate attribute determines the probability of each weight being mutated.
|
||||
"""
|
||||
wtshape = self.weights.shape
|
||||
glshape = self.germline.shape
|
||||
mutation_mask = np.random.random(self.germline.shape) < self.mutation_rate
|
||||
self.germline[mutation_mask] += np.random.normal(loc=0, scale=0.01, size=np.sum(mutation_mask))
|
||||
self.weights = self.germline
|
||||
assert glshape == self.germline.shape, "Error: mutate() germline shape has changed"
|
||||
assert wtshape == self.weights.shape, "Error: mutate() weights shape has changed"
|
||||
|
||||
def crossover(self, other: 'EvolvableAgent') -> 'EvolvableAgent':
|
||||
"""
|
||||
Perform uniform crossover between this agent and another agent, creating a new offspring agent.
|
||||
Args:
|
||||
other (EvolvableAgent): The other agent to perform crossover with.
|
||||
Returns:
|
||||
EvolvableAgent: The offspring agent resulting from the crossover.
|
||||
"""
|
||||
wtshape = self.weights.shape
|
||||
glshape = self.germline.shape
|
||||
offspring = EvolvableAgent(self.obsSpace, self.actSpace, self.alpha, self.gamma, self.epsilon, self.lmbda, self.mutation_rate, self.crossover_rate, self.init_fitness4)
|
||||
if np.random.random() <= self.crossover_rate:
|
||||
crossover_mask = np.random.randint(0, 2, size=self.germline.shape, dtype=bool)
|
||||
offspring.germline = np.where(crossover_mask, self.germline, other.germline)
|
||||
else:
|
||||
offspring.germline = self.germline
|
||||
offspring.weights = offspring.germline
|
||||
assert self.obsSpace.shape == offspring.obsSpace.shape, f"Error: offspring has different obsSpace {offspring.obsSpace} != {self.obsSpace}"
|
||||
assert self.actSpace.shape == offspring.actSpace.shape, f"Error: offspring has different actSpace {offspring.actSpace} != {self.actSpace}"
|
||||
assert tuple_shape(self.ftrSpace) == tuple_shape(offspring.ftrSpace), f"Error: offspring had different ftrSpace {offspring.ftrSpace} {offspring.obsSpace} {offspring.actSpace} != {self.ftrSpace} {self.obsSpace} {self.actSpace}"
|
||||
assert glshape == offspring.germline.shape, "Error: offspring germline shape has changed"
|
||||
assert wtshape == offspring.weights.shape, "Error: offspring weights shape has changed"
|
||||
return offspring
|
||||
|
||||
def set_wfitness(self, fitness: float):
|
||||
"""
|
||||
Set the fitnevss value for the agent.
|
||||
Args:
|
||||
fitness (float): The fitness value to be set.
|
||||
"""
|
||||
self.wfitness = fitness
|
||||
|
||||
def get_wfitness(self) -> float:
|
||||
"""
|
||||
Get the fitness value of the agent.
|
||||
Returns:
|
||||
float: The fitness value of the agent.
|
||||
"""
|
||||
return self.wfitness
|
||||
|
||||
def set_fitness(self, fitness: float):
|
||||
"""
|
||||
Set the fitness value for the agent.
|
||||
Args:
|
||||
fitness (float): The fitness value to be set.
|
||||
"""
|
||||
self.fitness.values = (fitness,)
|
||||
|
||||
def get_fitness(self) -> float:
|
||||
"""
|
||||
Get the fitness value of the agent.
|
||||
Returns:
|
||||
float: The fitness value of the agent.
|
||||
"""
|
||||
return self.fitness.values[0]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
'''test important functions and workflows with doctesting
|
||||
run this python file by itself to run these tests, and set
|
||||
LOGGING=True near top of file.'''
|
||||
import doctest
|
||||
from functools import partial
|
||||
#doctest.testmod()
|
||||
test = partial(doctest.run_docstring_examples, globs=globals())
|
||||
test(Agent.predictPayoffsForAllActions)
|
||||
|
|
@ -0,0 +1,341 @@
|
|||
"""
|
||||
ew.py
|
||||
|
||||
Evolve Weights
|
||||
|
||||
Uses DEAP to evolve a set of weights with mutation and crossover.
|
||||
|
||||
Integration with other code happens via programming by contract.
|
||||
The 'environ' parameter must be an object that provides two
|
||||
methods:
|
||||
get_weights_len : returns a scalar integer indicating the 1D vector length for weights
|
||||
evaluate : accepts a weight vector, returns a tuple object containing a single fitness value (e.g., (0.5,))
|
||||
and has an attribute related to reinforcement learning for agents:
|
||||
alpha
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
import copy
|
||||
|
||||
import numpy as np, itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
#import environments
|
||||
#import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
#importlib.reload(environments)
|
||||
#importlib.reload(agents)
|
||||
#from environments.gridworld import GridWorld
|
||||
#import environments.puzzle as pz
|
||||
#from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
||||
|
||||
#from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
#from mda import MultiDimArray
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class EvolveWeights(object):
|
||||
"""
|
||||
Class to apply DEAP to evolve a population consisting of a set
|
||||
of weights.
|
||||
"""
|
||||
def __init__(self,
|
||||
# environ, # Instance of environ class
|
||||
# What is needed from environ?
|
||||
# weights_len (int)
|
||||
# alpha (float)
|
||||
# evaluate (method/function)
|
||||
weights_len,
|
||||
alpha=0.05,
|
||||
evaluate=None,
|
||||
popsize=100,
|
||||
maxgenerations=10000,
|
||||
cxpb=0.5,
|
||||
mtpb=0.05,
|
||||
wmin=-20.0,
|
||||
wmax=20.0,
|
||||
mut_center=0.0,
|
||||
mut_sigma=0.1,
|
||||
mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='environ'
|
||||
):
|
||||
|
||||
self.tag = tag
|
||||
self.starttime = isotime()
|
||||
self.logbase = tag + "_" + t2fn(self.starttime)
|
||||
|
||||
# Excluding environment as a parameter
|
||||
# self.environ = environ
|
||||
# Instead, we need to pass in weights_len, alpha, evaluate
|
||||
self.weights_len = weights_len # environ.get_weights_len()
|
||||
self.alpha = alpha
|
||||
self.evaluate = evaluate
|
||||
|
||||
self.popsize = popsize
|
||||
self.maxgenerations = maxgenerations
|
||||
self.cxpb = cxpb
|
||||
self.mtpb = mtpb
|
||||
self.wmin = wmin
|
||||
self.wmax = wmax
|
||||
self.mut_center = mut_center
|
||||
self.mut_sigma = mut_sigma
|
||||
self.mut_indpb = mut_indpb
|
||||
self.tournsize = tournsize
|
||||
self.tournk = tournk
|
||||
self.normalize_fitness = normalize_fitness
|
||||
pass
|
||||
|
||||
def masv(self, pop):
|
||||
mav = []
|
||||
maxs = []
|
||||
for ind in pop:
|
||||
wts = [x for x in ind]
|
||||
mav.append(np.mean(np.abs(wts)))
|
||||
maxs.append(np.max(np.abs(wts)))
|
||||
allmax = np.max(maxs)
|
||||
mymasv = [x/allmax for x in mav]
|
||||
return mymasv
|
||||
|
||||
def cxTwoPointCopy(self, ind1, ind2):
|
||||
"""Execute a two points crossover with copy on the input individuals. The
|
||||
copy is required because the slicing in numpy returns a view of the data,
|
||||
which leads to a self overwriting in the swap operation. It prevents
|
||||
::
|
||||
>>> import numpy as np
|
||||
>>> a = np.array((1,2,3,4))
|
||||
>>> b = np.array((5,6,7,8))
|
||||
>>> a[1:3], b[1:3] = b[1:3], a[1:3]
|
||||
>>> print(a)
|
||||
[1 6 7 4]
|
||||
>>> print(b)
|
||||
[5 6 7 8]
|
||||
"""
|
||||
size = len(ind1)
|
||||
cxpoint1 = random.randint(1, size)
|
||||
cxpoint2 = random.randint(1, size - 1)
|
||||
if cxpoint2 >= cxpoint1:
|
||||
cxpoint2 += 1
|
||||
else: # Swap the two cx points
|
||||
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
|
||||
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
|
||||
return ind1, ind2
|
||||
|
||||
def zero(self):
|
||||
return 0.0
|
||||
|
||||
def smallrandom(self, eps=None):
|
||||
"""
|
||||
Produce a small random number in [-eps .. eps].
|
||||
|
||||
A random variate in [-1 .. 1] is produced then
|
||||
multiplied by eps, so the final range is in [-eps .. eps].
|
||||
|
||||
"""
|
||||
if eps in [None]:
|
||||
eps = self.alpha
|
||||
rv = ((2.0 * random.random()) - 1.0) * eps
|
||||
return rv
|
||||
|
||||
def setup(self):
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
|
||||
|
||||
self.toolbox = base.Toolbox()
|
||||
|
||||
self.pool = multiprocessing.Pool()
|
||||
self.toolbox.register("map", self.pool.map)
|
||||
|
||||
#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
|
||||
# self.toolbox.register("attr_float", random.random)
|
||||
#self.toolbox.register("attr_float", self.zero)
|
||||
self.toolbox.register("attr_float", self.smallrandom)
|
||||
|
||||
self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
|
||||
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
|
||||
|
||||
# self.toolbox.register("evaluate", self.evaluate)
|
||||
self.toolbox.register("evaluate", self.evaluate)
|
||||
#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
|
||||
self.toolbox.register("mate", self.cxTwoPointCopy)
|
||||
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
|
||||
self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
|
||||
self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)
|
||||
|
||||
def normalize_fitnesses(self, fitnesses):
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
return fitnesses
|
||||
|
||||
def log_it(self, generation):
|
||||
pool = self.pool
|
||||
toolbox = self.toolbox
|
||||
self.pool = None
|
||||
self.toolbox = None
|
||||
pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
|
||||
pickle.dump(self, open(pklfn, "wb"))
|
||||
self.pool = pool
|
||||
self.toolbox = toolbox
|
||||
|
||||
def loop(self):
|
||||
self.population = self.toolbox.population(n=self.popsize)
|
||||
#print(self.masv(self.population))
|
||||
NGEN=self.maxgenerations
|
||||
for gen in range(NGEN):
|
||||
print("generation", gen)
|
||||
offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
|
||||
# print("offspring", offspring)
|
||||
# constrain genome values to [0,1]
|
||||
for offspring_i,individual in enumerate(offspring):
|
||||
np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
|
||||
# print("clipped offspring", offspring)
|
||||
# Evaluate the individuals with an invalid fitness (not yet evaluated)
|
||||
# print("check fitness.valid")
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
# print("invalid_ind", len(invalid_ind))
|
||||
#print("setting fitness")
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
"""
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
"""
|
||||
print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
self.fitness_dist(fitnesses)
|
||||
# print("update ind fitness")
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
#print("selection")
|
||||
#print("offspring\n", self.masv(offspring))
|
||||
self.offspring = offspring
|
||||
self.population = self.toolbox.select(offspring, k=len(self.population))
|
||||
if 0 == gen % 100:
|
||||
self.log_it(gen)
|
||||
|
||||
#print("population after selection\n", self.masv(self.population))
|
||||
#print("Report for generation", gen)
|
||||
self.report()
|
||||
|
||||
def report(self):
|
||||
# post-evolution analysis
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
self.fitnesses = fitnesses
|
||||
self.sortedFitnesses = sorted(fitnesses)
|
||||
self.sortedFitnesses.reverse()
|
||||
self.fitness_dist(fitnesses)
|
||||
|
||||
self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
|
||||
print("best/worst w", self.bestFitness, self.worstFitness)
|
||||
|
||||
self.bestGenome = tools.selBest(self.population, k=1)
|
||||
# print(self.bestGenome)
|
||||
|
||||
def ffmt(self, value, fmt="%3.2f"):
|
||||
return fmt % value
|
||||
|
||||
def fitness_dist(self, fitnesses):
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
|
||||
print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
|
||||
|
||||
def driver(self):
|
||||
# Initialize
|
||||
self.setup()
|
||||
# Generation loop
|
||||
self.loop()
|
||||
# Report
|
||||
self.report()
|
||||
self.log_it(self.maxgenerations)
|
||||
print(self.masv(self.population))
|
||||
self.pool.close()
|
||||
pass
|
||||
|
||||
def normalized(a, axis=-1, order=2):
|
||||
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
|
||||
l2[l2==0] = 1
|
||||
return a / np.expand_dims(l2, axis)
|
||||
|
||||
def normalize(v):
|
||||
if 0 == len(v):
|
||||
return np.nan
|
||||
return v/np.linalg.norm(v)
|
||||
|
||||
class MinEnv(object):
|
||||
def __init__(self, wt_len=12, alpha=0.01, w=0.5):
|
||||
self.alpha = alpha
|
||||
self.wt_len = wt_len
|
||||
self.w = w
|
||||
def get_weights_len(self):
|
||||
return self.wt_len
|
||||
def evaluate(self, wts):
|
||||
mywts = np.array([float(x) for x in wts])
|
||||
# Max entropy
|
||||
return np.std(normalize(mywts))/0.30,
|
||||
|
||||
def test_ew():
|
||||
env1 = MinEnv()
|
||||
|
||||
ew = EvolveWeights(env1, popsize=100, maxgenerations=10, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("ew.py start...")
|
||||
|
||||
test_ew()
|
||||
|
||||
print("ew.py done.")
|
||||
|
|
@ -0,0 +1,355 @@
|
|||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
import copy
|
||||
|
||||
import numpy as np # , itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
import environments
|
||||
import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
importlib.reload(environments)
|
||||
importlib.reload(agents)
|
||||
#from environments.gridworld import GridWorld
|
||||
import environments.puzzle as pz
|
||||
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
||||
|
||||
from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
from mda import MultiDimArray
|
||||
|
||||
# RESS
|
||||
from ress import RESS
|
||||
|
||||
# EvolveWeights
|
||||
# from ew import EvolveWeights
|
||||
from curio_evolve_weights import EvolveWeights
|
||||
|
||||
# Experiment
|
||||
from curio_experiment import Experiment
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
if (1):
|
||||
unambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[2], # state 0: Green
|
||||
[2], # state 1: Green (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
ambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
specdict = {
|
||||
'unambiguous_puzzle_spec': unambiguous_puzzle_spec,
|
||||
'ambiguous_puzzle_spec': ambiguous_puzzle_spec,
|
||||
}
|
||||
|
||||
|
||||
exp_schedule = {
|
||||
"setlist": [
|
||||
{
|
||||
"desc": "Initial puzzle set",
|
||||
"specs": ['unambiguous_puzzle_spec'],
|
||||
"turns": 50, # How many turns for 'lifetime learning'
|
||||
# Needs to be passed to the agent
|
||||
"num_stimuli": 6, # How many puzzles? Or how many different features?
|
||||
# Might just be number of 'features' in puzzle spec
|
||||
# We do not need to manually specify puzzle feature number
|
||||
"sequence_type": "fixed", #
|
||||
"probs": [[1.0], [1.0]] #
|
||||
},
|
||||
{
|
||||
"desc": "Stochastic puzzle sets",
|
||||
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
|
||||
"turns": 200,
|
||||
"num_stimuli": 6,
|
||||
"sequence_type": "stochastic",
|
||||
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def make_puzzle_list(*args, **kwargs):
|
||||
"""
|
||||
"""
|
||||
# Sanity checks
|
||||
req_params = ['specdict', 'schedule']
|
||||
|
||||
paramsvalid = True
|
||||
|
||||
for rpi in req_params:
|
||||
if not rpi in kwargs:
|
||||
paramsvalid = False
|
||||
print("make_puzzle_list missing", rpi)
|
||||
assert paramsvalid, f"Error: Missing a required parameter. Quitting."
|
||||
|
||||
specdict = kwargs['specdict']
|
||||
schedule = kwargs['schedule']
|
||||
|
||||
puzzles = []
|
||||
|
||||
upress = RESS() # Random Equal Stimulus Sets instance
|
||||
|
||||
for seti in schedule['setlist']:
|
||||
num_sets = len(seti['specs'])
|
||||
num_stimuli = seti['num_stimuli']
|
||||
num_turns = seti['turns']
|
||||
seqtype = seti['sequence_type']
|
||||
probs = seti['probs']
|
||||
|
||||
if 1 == num_sets:
|
||||
# Simple, just repeat the puzzle num_stimuli * times
|
||||
|
||||
pass
|
||||
else:
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
def exp1_environment(*args, **kwargs):
|
||||
|
||||
unambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[2], # state 0: Green
|
||||
[2], # state 1: Green (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
ambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
# Notion: Have an object to define a schedule of presentation of
|
||||
# environments, with the ability to stochastically present one of
|
||||
# a list of environments.
|
||||
exp_schedule = {
|
||||
"setlist": [
|
||||
{
|
||||
"desc": "Initial puzzle set",
|
||||
"specs": ['unambiguous_puzzle_spec'],
|
||||
"turns": 50,
|
||||
"num_stimuli": 6,
|
||||
"sequence_type": "fixed",
|
||||
"probs": [[1.0], [1.0]]
|
||||
},
|
||||
{
|
||||
"desc": "Stochastic puzzle sets",
|
||||
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
|
||||
"turns": 200,
|
||||
"num_stimuli": 6,
|
||||
"sequence_type": "stochastic",
|
||||
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
if 'num_puzzles_on_belt' in kwargs:
|
||||
num_puzzles_on_belt = kwargs['num_puzzles_on_belt']
|
||||
else:
|
||||
num_puzzles_on_belt = 6
|
||||
|
||||
pz = unambiguous_puzzle_spec
|
||||
if (1):
|
||||
maxrewards = [1]
|
||||
# Produce Gellermann sequence
|
||||
upress = RESS()
|
||||
print(dir(upress))
|
||||
print(pz['puzzles'])
|
||||
print(len(pz['puzzles']))
|
||||
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
|
||||
print("upseries", upseries)
|
||||
# Create puzzle sequence
|
||||
# call to make_puzzle_list goes about here
|
||||
|
||||
# Instantiate puzzles per Gellermann sequence
|
||||
puzzles = []
|
||||
for stimi in upseries:
|
||||
|
||||
stimn = int(stimi)
|
||||
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
|
||||
features=pz['puzzles'][stimn]['features'],
|
||||
rewards=pz['puzzles'][stimn]['rewards']
|
||||
)
|
||||
puzzles.append(myp)
|
||||
# Create conveyor belt
|
||||
world = ConvBelt(actionSpace = getActionSpace(puzzles),
|
||||
observationSpace = getObservationSpace(puzzles),
|
||||
maxRewards = maxrewards,
|
||||
agentclass=Agent,
|
||||
randomize = False, alpha=0.005)
|
||||
# Add puzzles
|
||||
for pi in puzzles:
|
||||
world.append(pi)
|
||||
|
||||
return world
|
||||
|
||||
|
||||
|
||||
def do_experiment():
|
||||
# Experiment instance
|
||||
print('creating myexp')
|
||||
myexp = Experiment()
|
||||
print('setting agentclass')
|
||||
myexp.set_agentclass(Agent)
|
||||
print('setting environclass')
|
||||
myexp.set_environclass(ConvBelt)
|
||||
print('setting evolverclass')
|
||||
myexp.set_evolverclass(EvolveWeights)
|
||||
print('setting evolver_attributes')
|
||||
myexp.set_evolver_attributes() # defaults
|
||||
print('setting environ_maker')
|
||||
myexp.set_environ_maker(exp1_environment) # sets function
|
||||
print('making environment')
|
||||
myexp.make_environ() # Calls function
|
||||
print('making evolver_instance')
|
||||
myexp.make_evolver_instance()
|
||||
if myexp.validate():
|
||||
print('running driver')
|
||||
myexp.evolver.driver()
|
||||
else:
|
||||
print("Experiment failed to validate.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print("exp1.py start...")
|
||||
|
||||
do_experiment()
|
||||
|
||||
print("exp1.py done.")
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
"""
|
||||
experiment.py
|
||||
|
||||
Curiosity project Experiment class definition.
|
||||
|
||||
Aim for better encapsulation.
|
||||
|
||||
Experiment class
|
||||
- This class should get the various classes to use in running an experiment
|
||||
- EvolveWeights
|
||||
- mda?
|
||||
- Environ (GridWorld, ConvBelt, Puzzle)
|
||||
- Still is going to require ad hoc function to create the particular Environ
|
||||
- But could pass in function to use
|
||||
- Agentclass
|
||||
- And experimental attributes
|
||||
- For example
|
||||
- Experiment constructs EW instance, passes in weight length
|
||||
- Experiment constructs Environ instance
|
||||
- Experiment requests evolution run of EW with parameters
|
||||
- EW calls Experiment for each evaluation of an individual (and in what generation)
|
||||
- Experiment calls Environ.evaluate with individual weights, agentclass
|
||||
- Passes w, tuple back to EW
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class Experiment(object):
|
||||
"""
|
||||
Experiment class. Instances will drive reinforcement learning experiments.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.agentclass = None
|
||||
self.environclass = None
|
||||
self.evolverclass = None
|
||||
self.environmaker = None
|
||||
pass
|
||||
|
||||
def validate(self):
|
||||
valid = True
|
||||
# Test that we have classes to use
|
||||
valid = valid and (not self.agentclass in [None])
|
||||
valid = valid and (not self.environclass in [None])
|
||||
valid = valid and (not self.evolverclass in [None])
|
||||
# Test other values here
|
||||
return valid
|
||||
|
||||
def set_schedule(self, schedule):
|
||||
self.schedule = schedule
|
||||
|
||||
def set_environ_maker(self, environmaker):
|
||||
self.environmaker = environmaker
|
||||
|
||||
def make_environ(self):
|
||||
if not self.environmaker in [None]:
|
||||
try:
|
||||
self.environ = self.environmaker()
|
||||
except:
|
||||
estr = f"Error: traceback.format_exc()"
|
||||
print(estr)
|
||||
self.environ = None
|
||||
assert 0, "Creating environment failed. Quitting."
|
||||
|
||||
def set_agentclass(self, agentclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
# No test yet
|
||||
|
||||
if okclass:
|
||||
self.agentclass = agentclass
|
||||
|
||||
def get_agentclass(self):
|
||||
return self.agentclass
|
||||
|
||||
def set_environclass(self, environclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
|
||||
if not 'evaluate' in dir(environclass):
|
||||
okclass = False
|
||||
print("set_environclass error: class does not provide 'evaluate'")
|
||||
|
||||
if okclass:
|
||||
self.environclass = environclass
|
||||
|
||||
def get_environclass(self):
|
||||
return self.environclass
|
||||
|
||||
def set_evolverclass(self, evolverclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
|
||||
if not 'driver' in dir(evolverclass):
|
||||
okclass = False
|
||||
print("set_evolverclass error: class does not provide 'driver'")
|
||||
|
||||
if okclass:
|
||||
self.evolverclass = evolverclass
|
||||
|
||||
def set_agent_attributes(self, alpha=0.005):
|
||||
self.agent_props = Holder()
|
||||
self.agent_props.alpha = 0.005
|
||||
|
||||
def set_evolver_attributes(self,
|
||||
popsize=100,
|
||||
maxgenerations=10000,
|
||||
cxpb=0.5,
|
||||
mtpb=0.05,
|
||||
wmin=-20.0,
|
||||
wmax=20.0,
|
||||
mut_center=0.0,
|
||||
mut_sigma=0.1,
|
||||
mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='environ'
|
||||
):
|
||||
self.evolver_props = Holder()
|
||||
self.evolver_props.popsize = popsize
|
||||
self.evolver_props.maxgenerations = maxgenerations
|
||||
self.evolver_props.cxpb = cxpb
|
||||
self.evolver_props.mtpb = mtpb
|
||||
self.evolver_props.wmin = wmin
|
||||
self.evolver_props.wmax = wmax
|
||||
self.evolver_props.mut_center = mut_center
|
||||
self.evolver_props.mut_sigma = mut_sigma
|
||||
self.evolver_props.mut_indpb = mut_indpb
|
||||
self.evolver_props.tournsize = tournsize
|
||||
self.evolver_props.tournk = tournk
|
||||
self.evolver_props.normalize_fitness = normalize_fitness
|
||||
self.evolver_props.tag = tag
|
||||
|
||||
def make_evolver_instance(self):
|
||||
self.evolver = self.evolverclass(
|
||||
# self.environclass,
|
||||
# weights_len
|
||||
weights_len=self.environ.get_weights_len(),
|
||||
# alpha
|
||||
alpha=self.environ.alpha,
|
||||
# evaluate function
|
||||
evaluate=self.environ.evaluate,
|
||||
popsize=self.evolver_props.popsize,
|
||||
maxgenerations=self.evolver_props.maxgenerations,
|
||||
cxpb=self.evolver_props.cxpb,
|
||||
mtpb=self.evolver_props.mtpb,
|
||||
wmin=self.evolver_props.wmin,
|
||||
wmax=self.evolver_props.wmax,
|
||||
mut_center= self.evolver_props.mut_center,
|
||||
mut_sigma= self.evolver_props.mut_sigma,
|
||||
mut_indpb= self.evolver_props.mut_indpb,
|
||||
tournsize= self.evolver_props.tournsize,
|
||||
tournk= self.evolver_props.tournk,
|
||||
normalize_fitness= self.evolver_props.normalize_fitness,
|
||||
tag= self.evolver_props.tag
|
||||
)
|
||||
|
||||
def set_env_attributes(self):
|
||||
self.env_props = Holder()
|
||||
|
||||
def handle_evaluation(self, ind, generation):
|
||||
"""
|
||||
evolver calls this to get an evaluation of an
|
||||
individual.
|
||||
|
||||
Depending on the experiment schedule and generation,
|
||||
this may require constructing a new environment.
|
||||
"""
|
||||
pass
|
||||
|
||||
def run_experiment(self):
|
||||
"""
|
||||
# Run experiment
|
||||
ew = EvolveWeights(world,
|
||||
popsize=100,
|
||||
maxgenerations=1000,
|
||||
tournsize=75,
|
||||
tournk=3,
|
||||
normalize_fitness=False)
|
||||
ew.driver()
|
||||
"""
|
||||
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
# custom version of openAI's gridworld
|
||||
# to support arbitrary holes
|
||||
|
||||
from typing import Tuple, List, Any
|
||||
|
||||
class GridWorld:
|
||||
def __init__(self,dims,startState=[0,0]):
|
||||
self.height = dims[0]
|
||||
self.width = dims[1]
|
||||
self.startState = startState
|
||||
self.state = self.startState[:]
|
||||
self.holes = []
|
||||
self.goals = []
|
||||
def reset(self):
|
||||
'''returns an initial observation while also resetting the environment'''
|
||||
self.state = self.startState[:]
|
||||
return self.state
|
||||
def step(self,action) -> Tuple[Tuple[int], float, bool, Any]:
|
||||
delta = [0,0]
|
||||
if (action == 0): delta[0] = -1
|
||||
elif (action == 2): delta[0] = 1
|
||||
elif (action == 1): delta[1] = 1
|
||||
else: delta[1] = -1
|
||||
newstate = [self.state[0]+delta[0], self.state[1]+delta[1]]
|
||||
newstate[0] = min(max(0,newstate[0]),self.height-1)
|
||||
newstate[1] = min(max(0,newstate[1]),self.width-1)
|
||||
self.state = newstate
|
||||
# set default returns
|
||||
reward = -1.0
|
||||
goalFound = False
|
||||
# check for goal
|
||||
if self.state in self.goals:
|
||||
goalFound = True
|
||||
reward = 0.0
|
||||
elif self.state in self.holes:
|
||||
reward = -10.0
|
||||
# openAIgym format: (state, reward, goalAchieved, DebugVisInfo)
|
||||
return (self.state, reward, goalFound, None)
|
||||
|
||||
def render(env,brain):
|
||||
# renders a gridworld environment
|
||||
# and plots the agent's path
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
path = []
|
||||
brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights)
|
||||
nextState = env.reset()
|
||||
dims = [env.height, env.width, 4]
|
||||
path.append(nextState)
|
||||
time = 0
|
||||
while True:
|
||||
time += 1
|
||||
brain.sensoryState = nextState # SET INPUTS
|
||||
brain.plasticUpdate()
|
||||
nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS
|
||||
path.append(nextState)
|
||||
if goal_achieved or time == 100: break
|
||||
brain.reward = reward
|
||||
y,x = zip(*path)
|
||||
x,y = (np.array(x)+0.5, np.array(y)+0.5)
|
||||
# setup figure
|
||||
plt.figure(figsize=(dims[1],dims[0]))
|
||||
# plot landmarks
|
||||
hasGoals = False
|
||||
goals = []
|
||||
hasHoles = False
|
||||
holes = []
|
||||
try: goals = env.goals
|
||||
except AttributeError: pass
|
||||
else: hasGoals = True
|
||||
try: holes = env.holes
|
||||
except AttributeError: pass
|
||||
else: hasHoles = True
|
||||
if hasGoals:
|
||||
for goal in goals:
|
||||
newrec = plt.Rectangle((goal[1], goal[0]), 1, 1, color='green', edgecolor=None, linewidth=2.5, alpha=0.7)
|
||||
plt.gca().add_patch(newrec)
|
||||
if hasHoles:
|
||||
for hole in holes:
|
||||
newrec = plt.Rectangle((hole[1], hole[0]), 1, 1, color='orange', edgecolor=None, linewidth=2.5, alpha=0.7)
|
||||
plt.gca().add_patch(newrec)
|
||||
plt.plot(x,y,color='gray')
|
||||
plt.scatter(x[0],y[0],s=64,color='green')
|
||||
plt.scatter(x[-1],y[-1],s=64,color='red')
|
||||
plt.grid(linestyle='--')
|
||||
plt.ylim([0,dims[0]])
|
||||
plt.xlim([0,dims[1]])
|
||||
plt.gca().set_yticks(list(range(dims[0])))
|
||||
plt.gca().set_xticks(list(range(dims[1])))
|
||||
plt.gca().invert_yaxis()
|
||||
# print out location history
|
||||
print(' '.join([str(x)+','+str(y) for x,y in path]))
|
||||
|
||||
|
|
@ -0,0 +1,494 @@
|
|||
"""
|
||||
puzzle.py
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np, itertools
|
||||
from random import shuffle
|
||||
from typing import List, Tuple, Union, Any
|
||||
import copy
|
||||
#import gym, gym_gridworlds # if using other environments
|
||||
|
||||
|
||||
# overridden in agent.py, typically due to load order
|
||||
LOGGING = True
|
||||
|
||||
import logging, sys
|
||||
logging.basicConfig(stream=sys.stdout,level=logging.INFO)
|
||||
log = logging.getLogger()
|
||||
|
||||
if not LOGGING:
|
||||
# remove all logging functionality
|
||||
for handler in log.handlers.copy():
|
||||
try:
|
||||
log.removeHandler(handler)
|
||||
except ValueError: # in case another thread has already removed it
|
||||
pass
|
||||
log.addHandler(logging.NullHandler())
|
||||
log.propagate = False
|
||||
|
||||
class Puzzle:
|
||||
|
||||
__slots__ = [
|
||||
'tt',
|
||||
'features',
|
||||
'rewards',
|
||||
'state',
|
||||
'initialState',
|
||||
'solved',
|
||||
'solvable',
|
||||
'maxrewards',
|
||||
'originalrewards']
|
||||
|
||||
|
||||
def __init__(self, tt:List[List[int]], features:List[int], rewards:List[float], initialState:int = 0):
|
||||
self.tt = tt
|
||||
self.features = features
|
||||
self.rewards = rewards[:]
|
||||
self.originalrewards = rewards
|
||||
self.state = 0
|
||||
self.initialState = initialState
|
||||
self.solved = False
|
||||
|
||||
|
||||
def __str__(self) -> str:
|
||||
output = ""
|
||||
output += "transition table:\n"
|
||||
for row in self.tt:
|
||||
output += f" {str(row)}\n"
|
||||
output += f"solved: {self.solved}\n"
|
||||
output += f"state: {self.state}\n"
|
||||
output += f"features: {self.features}\n"
|
||||
output += f"rewards: {self.rewards}\n"
|
||||
return output
|
||||
|
||||
|
||||
def reset(self):
|
||||
'''must be called before first use'''
|
||||
self.solved = False
|
||||
self.state = self.initialState
|
||||
self.rewards = self.originalrewards[:]
|
||||
|
||||
|
||||
def setMaxRewards(self, maxRewards):
|
||||
'''typically used by the ConvBelt class before reset()'''
|
||||
self.maxrewards = set(self.rewards) & set(maxRewards)
|
||||
self.solvable = bool(self.maxrewards)
|
||||
|
||||
|
||||
def transition(self,action:int) -> Tuple[float, List[int], bool]:
|
||||
self.state = self.tt[self.state][action]
|
||||
finished = False
|
||||
reward = self.rewards[self.state]
|
||||
if self.rewards[self.state] in self.maxrewards:
|
||||
self.rewards[self.state] = -1 # 'eat' the food and replace with empty reward
|
||||
finished = True
|
||||
self.solved = True
|
||||
return (reward, self.features[self.state], finished)
|
||||
|
||||
def getFeatures(self) -> List[int]:
|
||||
'''returns only the current observable features of the puzzle'''
|
||||
return self.features[self.state]
|
||||
|
||||
|
||||
def Action(index:Union[int,str]) -> Union[str,int]:
|
||||
''' action str <-> int Action('pass')->1 Action(1)->'pass' '''
|
||||
if isinstance(index, (int,np.int64)):
|
||||
return ('idle','pass','investigate','eat')[index]
|
||||
return {'idle':0,'pass':1,'investigate':2,'eat':3}[index]
|
||||
|
||||
|
||||
class ConvBelt:
|
||||
"""
|
||||
__slots__ = [
|
||||
'puzzles', # (list[Puzzle]) - list of puzzles, use append()
|
||||
'pi', # (int) - currently selected puzzle / "puzzle index"
|
||||
'puzzle', # (ref:Puzzle) - shortcut for self.puzzles[pi]
|
||||
'randomize', # (bool) - shuffling of puzzles between trials
|
||||
'maxrewards', # (list[float]) - the maximum achievable rewards
|
||||
'action_space', # (tuple[int]) - number of actions available to agents, usually (4,)
|
||||
'observation_space', # (tuple[int]) - features/dimensions given to agents (dim1 size, dim2 size...)
|
||||
'puzzlesLeftToComplete', # (int) - faster tracking of how many are left, when 0 set self.solved
|
||||
'solved', # (bool) - state flag for all puzzles solved (trial can be over)
|
||||
'agentclass',
|
||||
'killed_reward',
|
||||
'max_training_trials',
|
||||
'max_steps',
|
||||
'alpha',
|
||||
'gamma',
|
||||
'epsilon',
|
||||
'lmbda',
|
||||
#'get_weights_len',
|
||||
#'reset',
|
||||
#'extend',
|
||||
#'clear',
|
||||
]
|
||||
"""
|
||||
|
||||
def __init__(self,actionSpace,observationSpace,maxRewards, agentclass,
|
||||
killed_reward=-10.0, max_training_trials=50, max_steps=32,
|
||||
alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42, randomize=False):
|
||||
'''please provide entire actionSpace, observationSpace, maxRewards for all puzzles
|
||||
even those later added this environment'''
|
||||
self.puzzles = []
|
||||
self.pi = 0
|
||||
self.puzzle = None
|
||||
self.randomize = randomize
|
||||
self.action_space = actionSpace
|
||||
self.observation_space = observationSpace
|
||||
self.maxrewards = maxRewards
|
||||
self.puzzlesLeftToComplete = 0
|
||||
self.solved = False
|
||||
|
||||
self.agentclass = agentclass
|
||||
self.killed_reward = killed_reward
|
||||
self.max_training_trials = max_training_trials
|
||||
self.max_steps = max_steps
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.epsilon = epsilon
|
||||
self.lmbda = lmbda
|
||||
|
||||
print(self.get_weights_len())
|
||||
|
||||
def get_weights_len(self):
|
||||
"""
|
||||
Return the length of weights needed for an agent.
|
||||
"""
|
||||
print("in ConvBelt.get_weights_len")
|
||||
mywl = np.prod(tuple(self.observation_space) + tuple(self.action_space))
|
||||
return mywl
|
||||
|
||||
def reset(self):
|
||||
'''returns an initial observation while also resetting the environment'''
|
||||
log.info("resetting all puzzles")
|
||||
self.puzzlesLeftToComplete = 0
|
||||
for puzzle in self.puzzles:
|
||||
puzzle.reset()
|
||||
if puzzle.solvable:
|
||||
self.puzzlesLeftToComplete += 1
|
||||
self.solved = not bool(self.puzzlesLeftToComplete)
|
||||
if self.randomize: shuffle(self.puzzles)
|
||||
self.pi = 0
|
||||
if len(self.puzzles) == 0:
|
||||
raise Exception("Please add puzzles to the belt/env first using append() or extend()")
|
||||
self.puzzle = self.puzzles[self.pi]
|
||||
return self.puzzle.getFeatures()
|
||||
|
||||
def append(self, newPuzzle:Puzzle):
|
||||
log.info("adding new puzzle")
|
||||
newPuzzle.setMaxRewards(self.maxrewards)
|
||||
newPuzzle.reset()
|
||||
if newPuzzle.solvable:
|
||||
self.puzzlesLeftToComplete += 1
|
||||
self.solved = False
|
||||
self.puzzles.append(newPuzzle)
|
||||
if self.puzzle is None:
|
||||
self.reset()
|
||||
|
||||
def extend(self, newPuzzles:List[Puzzle]):
|
||||
log.info(f"adding {len(newPuzzles)} new puzzles")
|
||||
oldLength = len(self.puzzles)
|
||||
self.puzzles.extend(newPuzzles)
|
||||
newLength = len(self.puzzles)
|
||||
for puzzle_i in range(oldLength, newLength):
|
||||
puzzle = self.puzzles[puzzle_i]
|
||||
puzzle.setMaxRewards(self.maxRewards)
|
||||
puzzle.reset()
|
||||
if puzzle.solvable:
|
||||
self.puzzlesLeftToComplete += 1
|
||||
self.solved = False
|
||||
if self.puzzle is None:
|
||||
self.reset()
|
||||
|
||||
def _post_removal(self):
|
||||
if len(self.puzzles) == 0:
|
||||
self.puzzle = None
|
||||
log.info("puzzles list now empty")
|
||||
if self.pi >= len(self.puzzles)-1:
|
||||
self.pi = 0
|
||||
log.info("resetting index to 0")
|
||||
|
||||
def clear(self):
|
||||
'''clears the belt of puzzles'''
|
||||
self.puzzles.clear()
|
||||
log.info("removed ALL puzzles")
|
||||
self.puzzlesLeftToComplete = 0
|
||||
self._post_removal()
|
||||
|
||||
def remove(self, puzzle):
|
||||
'''removes puzzle from belt of puzzles'''
|
||||
if puzzle.solvable:
|
||||
self.puzzlesLeftToComplete -= 1
|
||||
self.puzzles.remove(puzzle)
|
||||
log.info("removed puzzle")
|
||||
self._post_removal()
|
||||
|
||||
def pop(self, index=None):
|
||||
'''removes puzzle at index or from end'''
|
||||
if index is None:
|
||||
index = -1
|
||||
puzzle = self.puzzles.pop(index)
|
||||
if puzzle.solvable:
|
||||
self.puzzlesLeftToComplete -= 1
|
||||
log.info(f"popped puzzle at index {index}")
|
||||
self._post_removal()
|
||||
|
||||
def _completed_a_puzzle(self):
|
||||
self.puzzlesLeftToComplete -= 1
|
||||
log.info(f"completed a puzzle - {self.puzzlesLeftToComplete} solvable puzzles remain")
|
||||
if self.puzzlesLeftToComplete == 0:
|
||||
self.solved = True
|
||||
log.info(f"all puzzles completed - trial complete")
|
||||
|
||||
def step(self, action:int) -> Tuple[List[int], float, bool, Any]: # returns (state,reward,goal,_) (gym format)
|
||||
if action == 1: # pass (change to next puzzle, and change no puzzle's state)
|
||||
self.pi = (self.pi + 1) % len(self.puzzles)
|
||||
# reports states of old and new puzzles instead of a transition
|
||||
log.info(f"(puzzle-step) action {action} ({Action(action)}) from old puzzle state {self.puzzle.state} to new puzzle state {self.puzzles[self.pi].state}")
|
||||
self.puzzle = self.puzzles[self.pi]
|
||||
return (self.puzzle.features[self.puzzle.state], # features
|
||||
-1, # reward of a pass
|
||||
#self.puzzle.rewards[self.puzzle.state], # reward
|
||||
self.solved, # done-flag
|
||||
None) # DebugVisInfo
|
||||
else:
|
||||
log.info(f"(puzzle-step) action {action} ({Action(action)}) from state {self.puzzle.state} to {self.puzzle.tt[self.puzzle.state][action]}")
|
||||
reward, features, puzzle_just_finished = self.puzzle.transition(action)
|
||||
if puzzle_just_finished:
|
||||
self._completed_a_puzzle()
|
||||
return (features, reward, self.solved, None)
|
||||
|
||||
def render(self, env, brain):
|
||||
# renders a puzzlebox environment
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
actions = []
|
||||
rewards = []
|
||||
states = []
|
||||
brain.reset() # Warning!!: NOT MABE-reset(), but soft-reset() (keep weights)
|
||||
nextState = env.reset()
|
||||
states.append(nextState)
|
||||
actions.append(0) # path is recording actions in this visualization
|
||||
rewards.append(-1)
|
||||
time = 0
|
||||
print(env.puzzlesLeftToComplete)
|
||||
while True:
|
||||
time += 1
|
||||
brain.sensoryState = nextState # SET INPUTS
|
||||
brain.plasticUpdate()
|
||||
nextState, reward, goal_achieved, _ = env.step(brain.action) # GET OUTPUTS
|
||||
actions.append(brain.action)
|
||||
rewards.append(reward)
|
||||
states.append(nextState)
|
||||
if env.puzzlesLeftToComplete == 0 or time == 600: break
|
||||
#if goal_achieved or time == 100: break
|
||||
brain.reward = reward
|
||||
print(actions)
|
||||
print(states)
|
||||
plt.figure()
|
||||
plt.plot(actions)
|
||||
plt.scatter(list(range(len(actions))),actions)
|
||||
plt.figure()
|
||||
plt.plot(rewards)
|
||||
plt.scatter(list(range(len(rewards))),rewards)
|
||||
|
||||
def evaluate(self, ind,
|
||||
num_trials=200,
|
||||
n_actions=4,
|
||||
HARD_TIME_LIMIT=600):
|
||||
"""
|
||||
Given an individual agent's weights, evaluate it and
|
||||
return its fitness.
|
||||
"""
|
||||
w = 0.0
|
||||
|
||||
# Need to refactor the following code taken from the
|
||||
# Jupyter notebook.
|
||||
|
||||
# domain-specific settings
|
||||
#num_trials=200
|
||||
#n_actions = 4
|
||||
#(optimal lmbda in the agent is domain dependent - could be evolved)
|
||||
#HARD_TIME_LIMIT = 600
|
||||
#KILLED_REWARD = -10 # not used here
|
||||
#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)
|
||||
#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)
|
||||
|
||||
# alpha # how much to weigh reward surprises that deviate from expectation
|
||||
# gamma # how important exepcted rewards will be
|
||||
# epsilon # fraction of exploration to exploitation (how often to choose a random action)
|
||||
# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=
|
||||
|
||||
agent = self.agentclass(obsSpace=self.observation_space, actSpace=self.action_space, alpha=self.alpha,
|
||||
gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
|
||||
|
||||
|
||||
# Put weights in the Agent
|
||||
agent.weights = [x for x in ind]
|
||||
|
||||
time_to_solve_each_trial = []
|
||||
rewards = []
|
||||
|
||||
for trialN in range(self.max_training_trials):
|
||||
# some output to see it running
|
||||
if (trialN % 10) == 0: print('.',end='')
|
||||
# initialize the agent, environment, and time for this trial
|
||||
agent.reset() # soft-reset() (keeps learned weights)
|
||||
nextState = self.reset()
|
||||
time = 0
|
||||
while True:
|
||||
time += 1
|
||||
# set agent senses based on environment and allow agent to determine an action
|
||||
agent.sensoryState = nextState
|
||||
agent.plasticUpdate()
|
||||
# determine effect on environment state & any reward (in standard openAI-gym API format)
|
||||
nextState, reward, goal_achieved, _ = self.step(agent.action)
|
||||
agent.reward = reward
|
||||
if self.puzzlesLeftToComplete == 0 or time == self.max_steps:
|
||||
agent.plasticUpdate()
|
||||
break
|
||||
# could have deadly rewards that stop the trial early
|
||||
#elif reward <= -10:
|
||||
# agent.sensoryState = nextState
|
||||
# agent.reward = reward
|
||||
# agent.plasticUpdate()
|
||||
# agent.reset()
|
||||
# nextState = self.reset()
|
||||
rewards.append(reward)
|
||||
time_to_solve_each_trial.append(time)
|
||||
|
||||
# Calculate fitness
|
||||
# Rewards are in [-1 .. 1], have to rescale to [0 .. 1]
|
||||
#scalerewards = (np.array(rewards) * 0.5) + 0.5
|
||||
#w = np.mean(scalerewards)
|
||||
w = sum(rewards)
|
||||
|
||||
return w,
|
||||
|
||||
|
||||
def getObservationSpace(*items) -> Tuple[int]:
|
||||
'''Returns total features dimensions over all puzzles, starting from 0.
|
||||
Given 1 or more puzzles, finds union of observation space (features).
|
||||
then returns the size of that space.
|
||||
Ensures all puzzles have same feature dimensions, errors if not.
|
||||
Useful when setting up a RL state space for certain feature sizes.
|
||||
[3,1] would have dimensions [4,2], and [[0,2],[0,1]] would be [1,3]
|
||||
|
||||
>>> p1 = Puzzle(tt=[[]], rewards=[], features=[[0,1],[0,1],[3,1]])
|
||||
>>> getObservationSpace(p1)
|
||||
(4, 2)
|
||||
>>> p2 = Puzzle(tt=[[]], rewards=[], features=[[1,1],[1,1],[2,4]])
|
||||
>>> getObservationSpace(p2)
|
||||
(3, 5)
|
||||
>>> getObservationSpace(p1,p2)
|
||||
(4, 5)
|
||||
>>> puzzles = [p1,p2]
|
||||
>>> getObservationSpace(puzzles)
|
||||
(4, 5)
|
||||
'''
|
||||
if type(items) is tuple and isinstance(items[0], Puzzle):
|
||||
# perform union (max) over feature space of all items
|
||||
highest = copy.copy(items[0].features[0]) # features is [[int,int,...],...]
|
||||
featurelen = len(highest)
|
||||
for puzzle in items:
|
||||
for featureset in puzzle.features:
|
||||
if len(featureset) != featurelen:
|
||||
raise Exception("not all features have the same length")
|
||||
for feature_i in range(len(featureset)):
|
||||
highest[feature_i] = max(highest[feature_i],featureset[feature_i])
|
||||
return tuple((e+1 for e in highest)) # size is 1+highest due to 0-indexing of features
|
||||
elif type(items) is tuple and type(items[0]) in (tuple,list):
|
||||
return getObservationSpace(*items[0]) # unpack one layer
|
||||
else:
|
||||
raise Exception(f"Expected type of Puzzle(s), but got {type(items)}")
|
||||
|
||||
|
||||
def getActionSpace(*items) -> Tuple[int]:
|
||||
'''Returns total action dimensions over all puzzles, (num columns in tt).
|
||||
Given 1 or more puzzles.
|
||||
Ensures all puzzles have same dimensions, errors if not.
|
||||
Useful when setting up a RL state space for certain action sizes.
|
||||
|
||||
>>> p1 = Puzzle(tt=[[0,0],[4,2]], rewards=[], features=[[]])
|
||||
>>> getActionSpace(p1)
|
||||
(2,)
|
||||
>>> p2 = Puzzle(tt=[[0,0,1],[1,1,2]], rewards=[], features=[[]])
|
||||
>>> getActionSpace(p2)
|
||||
(3,)
|
||||
>>> getActionSpace(p1,p2)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
Exception: not all puzzles (rows) have the same tt col size
|
||||
'''
|
||||
|
||||
if type(items) is tuple and isinstance(items[0], Puzzle):
|
||||
# perform union (max) over feature space of all items
|
||||
nrows, ncols = len(items[0].tt), len(items[0].tt[0])
|
||||
for puzzle in items:
|
||||
prows = len(puzzle.tt)
|
||||
if prows != nrows:
|
||||
raise Exception("not all puzzles have the same tt row size")
|
||||
samerows = [len(c) == ncols for c in puzzle.tt]
|
||||
if not all(samerows):
|
||||
raise Exception("not all puzzles (rows) have the same tt col size")
|
||||
return (ncols,)
|
||||
elif type(items) is tuple and type(items[0]) in (tuple,list):
|
||||
return getActionSpace(*items[0]) # unpack one layer
|
||||
else:
|
||||
raise Exception(f"Expected type of Puzzle(s), but got {type(items)}")
|
||||
|
||||
|
||||
def _test_world():
|
||||
'''full test of the conveyorbelt world
|
||||
|
||||
>>> import copy
|
||||
>>> maxrewards = [1]
|
||||
>>> easy_features = [[0,1],[0,1],[3,1],[0,0]]
|
||||
>>> easy_rewards = [-1,-1,-1,1]
|
||||
>>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
|
||||
>>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
|
||||
>>> p2 = copy.deepcopy(p1)
|
||||
>>> puzzles = (p1,p2)
|
||||
>>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False)
|
||||
>>> world.append(p1)
|
||||
>>> world.append(p2)
|
||||
>>> # trial 1
|
||||
>>> world.reset() # reset before first use just to be sure
|
||||
>>> world.step(Action('investigate'))
|
||||
(-1, [3, 1], False)
|
||||
>>> world.step(Action('pass'))
|
||||
(-1, [0, 1], False)
|
||||
>>> world.step(Action('eat'))
|
||||
(1, [0, 0], False)
|
||||
>>> world.step(Action('pass'))
|
||||
(-1, [3, 1], False)
|
||||
>>> world.step(Action('eat'))
|
||||
(1, [0, 0], True)
|
||||
>>> world.step(Action('eat')) # try eating again, notice reward change
|
||||
(-1, [0, 0], True)
|
||||
>>> # trial 2
|
||||
>>> world.reset()
|
||||
>>> world.step(Action('investigate'))
|
||||
(-1, [3, 1], False)
|
||||
>>> world.step(Action('pass'))
|
||||
(-1, [0, 1], False)
|
||||
>>> world.step(Action('eat'))
|
||||
(1, [0, 0], False)
|
||||
>>> world.step(Action('pass'))
|
||||
(-1, [3, 1], False)
|
||||
>>> world.step(Action('eat'))
|
||||
(1, [0, 0], True)
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
'''test important functions and workflows with doctesting
|
||||
run this python file by itself to run these tests, and set
|
||||
LOGGING=True near top of file.'''
|
||||
import doctest
|
||||
from functools import partial
|
||||
test = partial(doctest.run_docstring_examples, globs = globals())
|
||||
test(getObservationSpace)
|
||||
test(getActionSpace)
|
||||
test(_test_world)
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
import numpy as np
|
||||
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
|
||||
|
||||
toolbox = base.Toolbox()
|
||||
|
||||
#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
|
||||
toolbox.register("attr_float", random.random)
|
||||
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=100)
|
||||
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
|
||||
|
||||
def linearFitness(individual):
|
||||
'''selection pressure for genome values to be numpy.arange(start=0.0, stop=1.0, step=1/len(genome))'''
|
||||
import numpy as np
|
||||
a = np.arange(0, 1, 1.0/len(individual))
|
||||
b = np.array(individual)
|
||||
return 1.0-np.sum(np.abs(a-b))/(len(individual)*0.5),
|
||||
|
||||
def cxTwoPointCopy(ind1, ind2):
|
||||
"""Execute a two points crossover with copy on the input individuals. The
|
||||
copy is required because the slicing in numpy returns a view of the data,
|
||||
which leads to a self overwriting in the swap operation. It prevents
|
||||
::
|
||||
>>> import numpy as np
|
||||
>>> a = np.array((1,2,3,4))
|
||||
>>> b = np.array((5,6,7,8))
|
||||
>>> a[1:3], b[1:3] = b[1:3], a[1:3]
|
||||
>>> print(a)
|
||||
[1 6 7 4]
|
||||
>>> print(b)
|
||||
[5 6 7 8]
|
||||
"""
|
||||
size = len(ind1)
|
||||
cxpoint1 = random.randint(1, size)
|
||||
cxpoint2 = random.randint(1, size - 1)
|
||||
if cxpoint2 >= cxpoint1:
|
||||
cxpoint2 += 1
|
||||
else: # Swap the two cx points
|
||||
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
|
||||
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
|
||||
return ind1, ind2
|
||||
|
||||
toolbox.register("evaluate", linearFitness)
|
||||
#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
|
||||
toolbox.register("mate", cxTwoPointCopy)
|
||||
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
|
||||
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=0.05)
|
||||
toolbox.register("select", tools.selTournament, tournsize=3)
|
||||
|
||||
# evolution loop
|
||||
population = toolbox.population(n=100)
|
||||
NGEN=500
|
||||
for gen in range(NGEN):
|
||||
offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)
|
||||
# constrain genome values to [0,1]
|
||||
for offspring_i,individual in enumerate(offspring):
|
||||
np.clip(np.array(offspring[offspring_i]), 0.0, 1.0)
|
||||
# Evaluate the individuals with an invalid fitness (not yet evaluated)
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
population = toolbox.select(offspring, k=len(population))
|
||||
|
||||
# post-evolution analysis
|
||||
fitnesses = toolbox.map(toolbox.evaluate, population)
|
||||
sortedFitnesses = sorted(fitnesses)
|
||||
|
||||
bestFitness, worstFitness = sortedFitnesses[0], sortedFitnesses[-1]
|
||||
print(bestFitness, worstFitness)
|
||||
|
||||
bestGenome = tools.selBest(population, k=1)
|
||||
print(bestGenome)
|
||||
|
|
@ -0,0 +1,333 @@
|
|||
"""
|
||||
exp1.py - instance of use of 'experiment.py'
|
||||
|
||||
Tasks:
|
||||
|
||||
- Consider how to have a changing schedule of stimulus presentation
|
||||
|
||||
Need to have something where we can see evolution producing a trait that
|
||||
would indicate interest in new things in the environment. Sets up conditions
|
||||
where curiosity could be advantageous.
|
||||
|
||||
Conveyor belt needs to have the ability to introduce new things.
|
||||
|
||||
Single factor shift to start -- color of the thing ?
|
||||
The introduction of novelty is the main thing, where the novelty is
|
||||
associated with fitness advantage.
|
||||
|
||||
Simple systems to test
|
||||
- constant environment
|
||||
- switch between two different environments
|
||||
- frequency of shift makes a difference
|
||||
- Goldilocks zone for intermediate frequency
|
||||
Controlled randomization
|
||||
- Known low-payoff 'food' in environment
|
||||
- Better thing has a cue
|
||||
- Changing frequency of presentation
|
||||
- Constant
|
||||
- Ramp
|
||||
- Cycle
|
||||
- 'Green' could indicate better but
|
||||
- x factor for better could be changed
|
||||
|
||||
For all of these, we can test unseen (novel) stimuli
|
||||
- Generalization can be tested
|
||||
- Cue of goodness
|
||||
- Proportion of time novel stimulus are rewarding
|
||||
- Must be a proportion to introduce unpredictability
|
||||
|
||||
One hypothesis: unpredictability between cues and rewards may lead to curiosity
|
||||
- Evolutionary timescale of unpredictability
|
||||
- Predictable lifetime
|
||||
|
||||
Push current code to repository.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
import copy
|
||||
|
||||
import numpy as np # , itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
import environments
|
||||
import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
importlib.reload(environments)
|
||||
importlib.reload(agents)
|
||||
#from environments.gridworld import GridWorld
|
||||
import environments.puzzle as pz
|
||||
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
||||
|
||||
from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
from mda import MultiDimArray
|
||||
|
||||
# RESS
|
||||
from ress import RESS
|
||||
|
||||
# EvolveWeights
|
||||
# from ew import EvolveWeights
|
||||
from curio_evolve_weights import EvolveWeights
|
||||
|
||||
# Experiment
|
||||
from experiment import Experiment
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
"""
|
||||
A general class for the equivalent of a digital duffle bag, each instance
|
||||
can have essentially whatever you want stuffed into it.
|
||||
|
||||
This is essentially the very opposite of defining classes with the
|
||||
__slots__ convention, leaving the contents entirely open.
|
||||
|
||||
I've found this useful for making context objects. If I am careful,
|
||||
the whole object can be serialized to disk and loaded later.
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
"""
|
||||
Probability of reward at all
|
||||
Probability of strength of reward
|
||||
|
||||
Variances:
|
||||
- How many puzzle cues do we have?
|
||||
- How often does a puzzle appear in training?
|
||||
- How often does a puzzle appear across evolutionary time?
|
||||
- How much reward does solving a puzzle deliver?
|
||||
|
||||
Two things , green | red
|
||||
green good
|
||||
red bad
|
||||
|
||||
Outcomes
|
||||
- Too unlikely -> no behavior to examine
|
||||
- Entirely predictable
|
||||
- In between -> curiosity has advantage
|
||||
|
||||
First sample from uniform distribution to determine reward (0.5)
|
||||
Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.)
|
||||
|
||||
Spot or range where it becomes advantageous to evolve a curiosity module...
|
||||
|
||||
|
||||
Figuring out a representation that allows all the flexibility we discussed...
|
||||
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[2], # state 0: Green
|
||||
[2], # state 1: Green (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [
|
||||
[-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1, # state 2: consume (reward)
|
||||
0.5 # Proportion
|
||||
],
|
||||
[-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-1, # state 2: consume (punishment)
|
||||
0.5 # Proportion
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def exp1_environment(*args, **kwargs):
|
||||
|
||||
unambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[2], # state 0: Green
|
||||
[2], # state 1: Green (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
ambiguous_puzzle_spec = {
|
||||
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
||||
"puzzles": [
|
||||
{
|
||||
"puzzle_description": "Appetitive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
1], # state 2: consume (reward)
|
||||
},
|
||||
{
|
||||
"puzzle_description": "Aversive puzzle",
|
||||
"tt": [[0,0,2], # state 0: first presentation
|
||||
[0,0,0], # state 1: getting passed over (placeholder)
|
||||
[2,2,2]], # state 2: consumed (saturating)],
|
||||
"features": [[1], # state 0: Red
|
||||
[1], # state 1: Red (placeholder)
|
||||
[0]], # state 2: Empty/Unknown (after being eaten)
|
||||
"rewards": [-1, # state 0: first look
|
||||
-1, # state 1: proceeding to next puzzle (placeholder)
|
||||
-2], # state 2: consume (punishment)
|
||||
},
|
||||
]
|
||||
}
|
||||
# Notion: Have an object to define a schedule of presentation of
|
||||
# environments, with the ability to stochastically present one of
|
||||
# a list of environments.
|
||||
exp_schedule = {
|
||||
"setlist": [
|
||||
{
|
||||
"desc": "Initial puzzle set",
|
||||
"specs": [unambiguous_puzzle_spec],
|
||||
"turns": 50,
|
||||
"num_stimuli": 6,
|
||||
"sequence_type": "fixed",
|
||||
"probs": [[1.0], [1.0]]
|
||||
},
|
||||
{
|
||||
"desc": "Stochastic puzzle sets",
|
||||
"specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec],
|
||||
"turns": 200,
|
||||
"num_stimuli": 6,
|
||||
"sequence_type": "stochastic",
|
||||
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
if 'num_puzzles_on_belt' in kwargs:
|
||||
num_puzzles_on_belt = 6
|
||||
|
||||
pz = unambiguous_puzzle_spec
|
||||
if (1):
|
||||
maxrewards = [1]
|
||||
# Produce Gellermann sequence
|
||||
upress = RESS()
|
||||
print(dir(upress))
|
||||
print(pz['puzzles'])
|
||||
print(len(pz['puzzles']))
|
||||
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
|
||||
print("upseries", upseries)
|
||||
# Create puzzle sequence
|
||||
# Instantiate puzzles per Gellermann sequence
|
||||
puzzles = []
|
||||
for stimi in upseries:
|
||||
|
||||
stimn = int(stimi)
|
||||
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
|
||||
features=pz['puzzles'][stimn]['features'],
|
||||
rewards=pz['puzzles'][stimn]['rewards']
|
||||
)
|
||||
puzzles.append(myp)
|
||||
# Create conveyor belt
|
||||
world = ConvBelt(actionSpace = getActionSpace(puzzles),
|
||||
observationSpace = getObservationSpace(puzzles),
|
||||
maxRewards = maxrewards,
|
||||
agentclass=Agent,
|
||||
randomize = False, alpha=0.005)
|
||||
# Add puzzles
|
||||
for pi in puzzles:
|
||||
world.append(pi)
|
||||
|
||||
return world
|
||||
|
||||
def do_experiment():
|
||||
# Experiment instance
|
||||
myexp = Experiment()
|
||||
myexp.set_agentclass(Agent)
|
||||
myexp.set_environclass(ConvBelt)
|
||||
myexp.set_evolverclass(EvolveWeights)
|
||||
myexp.set_evolver_attributes() # defaults
|
||||
myexp.set_environ_maker(exp1_environment) # sets function
|
||||
myexp.make_environ() # Calls function
|
||||
myexp.make_evolver_instance()
|
||||
if myexp.validate():
|
||||
myexp.evolver.driver()
|
||||
else:
|
||||
print("Experiment failed to validate.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print("exp1.py start...")
|
||||
|
||||
|
||||
do_experiment()
|
||||
|
||||
print("exp1.py done.")
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
"""
|
||||
experiment.py
|
||||
|
||||
Curiosity project Experiment class definition.
|
||||
|
||||
Aim for better encapsulation.
|
||||
|
||||
Experiment class
|
||||
- This class should get the various classes to use in running an experiment
|
||||
- EvolveWeights
|
||||
- mda?
|
||||
- Environ (GridWorld, ConvBelt, Puzzle)
|
||||
- Still is going to require ad hoc function to create the particular Environ
|
||||
- But could pass in function to use
|
||||
- Agentclass
|
||||
- And experimental attributes
|
||||
- For example
|
||||
- Experiment constructs EW instance, passes in weight length
|
||||
- Experiment constructs Environ instance
|
||||
- Experiment requests evolution run of EW with parameters
|
||||
- EW calls Experiment for each evaluation of an individual (and in what generation)
|
||||
- Experiment calls Environ.evaluate with individual weights, agentclass
|
||||
- Passes w, tuple back to EW
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class Experiment(object):
|
||||
"""
|
||||
Experiment class. Instances will drive reinforcement learning experiments.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.agentclass = None
|
||||
self.environclass = None
|
||||
self.evolverclass = None
|
||||
self.environmaker = None
|
||||
pass
|
||||
|
||||
def validate(self):
|
||||
valid = True
|
||||
# Test that we have classes to use
|
||||
valid = valid and (not self.agentclass in [None])
|
||||
valid = valid and (not self.environclass in [None])
|
||||
valid = valid and (not self.evolverclass in [None])
|
||||
# Test other values here
|
||||
return valid
|
||||
|
||||
def set_schedule(self, schedule):
|
||||
self.schedule = schedule
|
||||
|
||||
def set_environ_maker(self, environmaker):
|
||||
self.environmaker = environmaker
|
||||
|
||||
def make_environ(self):
|
||||
if not self.environmaker in [None]:
|
||||
try:
|
||||
self.environ = self.environmaker()
|
||||
except:
|
||||
estr = f"Error: traceback.format_exc()"
|
||||
print(estr)
|
||||
self.environ = None
|
||||
|
||||
def set_agentclass(self, agentclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
# No test yet
|
||||
|
||||
if okclass:
|
||||
self.agentclass = agentclass
|
||||
|
||||
def get_agentclass(self):
|
||||
return self.agentclass
|
||||
|
||||
def set_environclass(self, environclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
|
||||
if not 'evaluate' in dir(environclass):
|
||||
okclass = False
|
||||
print("set_environclass error: class does not provide 'evaluate'")
|
||||
|
||||
if okclass:
|
||||
self.environclass = environclass
|
||||
|
||||
def get_environclass(self):
|
||||
return self.environclass
|
||||
|
||||
def set_evolverclass(self, evolverclass):
|
||||
# Test class for compatibility
|
||||
okclass = True
|
||||
|
||||
if not 'driver' in dir(evolverclass):
|
||||
okclass = False
|
||||
print("set_evolverclass error: class does not provide 'driver'")
|
||||
|
||||
if okclass:
|
||||
self.evolverclass = evolverclass
|
||||
|
||||
def set_agent_attributes(self, alpha=0.005):
|
||||
self.agent_props = Holder()
|
||||
self.agent_props.alpha = 0.005
|
||||
|
||||
def set_evolver_attributes(self,
|
||||
popsize=100,
|
||||
maxgenerations=10000,
|
||||
cxpb=0.5,
|
||||
mtpb=0.05,
|
||||
wmin=-20.0,
|
||||
wmax=20.0,
|
||||
mut_center=0.0,
|
||||
mut_sigma=0.1,
|
||||
mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='environ'
|
||||
):
|
||||
self.evolver_props = Holder()
|
||||
self.evolver_props.popsize = popsize
|
||||
self.evolver_props.maxgenerations = maxgenerations
|
||||
self.evolver_props.cxpb = cxpb
|
||||
self.evolver_props.mtpb = mtpb
|
||||
self.evolver_props.wmin = wmin
|
||||
self.evolver_props.wmax = wmax
|
||||
self.evolver_props.mut_center = mut_center
|
||||
self.evolver_props.mut_sigma = mut_sigma
|
||||
self.evolver_props.mut_indpb = mut_indpb
|
||||
self.evolver_props.tournsize = tournsize
|
||||
self.evolver_props.tournk = tournk
|
||||
self.evolver_props.normalize_fitness = normalize_fitness
|
||||
self.evolver_props.tag = tag
|
||||
|
||||
def make_evolver_instance(self):
|
||||
self.evolver = self.evolverclass(
|
||||
self.environclass,
|
||||
popsize=self.evolver_props.popsize,
|
||||
maxgenerations=self.evolver_props.maxgenerations,
|
||||
cxpb=self.evolver_props.cxpb,
|
||||
mtpb=self.evolver_props.mtpb,
|
||||
wmin=self.evolver_props.wmin,
|
||||
wmax=self.evolver_props.wmax,
|
||||
mut_center= self.evolver_props.mut_center,
|
||||
mut_sigma= self.evolver_props.mut_sigma,
|
||||
mut_indpb= self.evolver_props.mut_indpb,
|
||||
tournsize= self.evolver_props.tournsize,
|
||||
tournk= self.evolver_props.tournk,
|
||||
normalize_fitness= self.evolver_props.normalize_fitness,
|
||||
tag= self.evolver_props.tag
|
||||
)
|
||||
|
||||
def set_env_attributes(self):
|
||||
self.env_props = Holder()
|
||||
|
||||
def handle_evaluation(self, ind, generation):
|
||||
"""
|
||||
evolver calls this to get an evaluation of an
|
||||
individual.
|
||||
|
||||
Depending on the experiment schedule and generation,
|
||||
this may require constructing a new environment.
|
||||
"""
|
||||
pass
|
||||
|
||||
def run_experiment(self):
|
||||
"""
|
||||
# Run experiment
|
||||
ew = EvolveWeights(world,
|
||||
popsize=100,
|
||||
maxgenerations=1000,
|
||||
tournsize=75,
|
||||
tournk=3,
|
||||
normalize_fitness=False)
|
||||
ew.driver()
|
||||
"""
|
||||
|
||||
|
|
@ -0,0 +1,438 @@
|
|||
"""
|
||||
gwe.py -- GridWorld Evolving
|
||||
|
||||
Bringing together an Agent acting in GridWorld with
|
||||
DEAP evolutionary computation.
|
||||
|
||||
Notion: Set up for being able to call an Agent with
|
||||
a provided set of weights and run their training in
|
||||
a Gridworld environment. DEAP keeps a population of
|
||||
weights and handles the evolutionary computation.
|
||||
Save the best instantiated Agent per each generation
|
||||
for later review and analysis.
|
||||
"""
|
||||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
|
||||
import numpy as np, itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
import environments
|
||||
import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
importlib.reload(environments)
|
||||
importlib.reload(agents)
|
||||
from environments.gridworld import GridWorld
|
||||
from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
from mda import MultiDimArray
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class GoalsAndHolesWorld(object):
|
||||
"""
|
||||
Class for making and using a 2D GridWorld based on
|
||||
setting goals and holes (hazards) for an RL Agent
|
||||
to explore.
|
||||
|
||||
"""
|
||||
def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass,
|
||||
killed_reward=-10.0, max_training_trials=50, max_steps=32,
|
||||
alpha=0.01, gamma=0.95, epsilon=0.01, lmbda=0.42
|
||||
):
|
||||
self.obsSpace = tuple(obsSpace)
|
||||
self.actSpace = tuple(actSpace)
|
||||
self.goals = list(goals)
|
||||
self.holes = tuple(holes)
|
||||
self.startState = tuple(startstate)
|
||||
self.agentclass = agentclass
|
||||
self.killed_reward = killed_reward
|
||||
self.max_training_trials = max_training_trials
|
||||
self.max_steps = max_steps
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.epsilon = epsilon
|
||||
self.lmbda = lmbda
|
||||
self.env = self.make_env(self.startState, self.obsSpace, self.goals, self.holes)
|
||||
print("Goals from env", self.env.goals)
|
||||
pass
|
||||
|
||||
def get_weights_len(self):
|
||||
mywl = np.prod(tuple(self.obsSpace) + tuple(self.actSpace))
|
||||
return mywl
|
||||
|
||||
def make_env(self, startstate=None, dims=None, goals=None, holes=None):
|
||||
if startstate in [None]:
|
||||
startstate = self.startState
|
||||
if dims in [None]:
|
||||
dims = self.obsSpace
|
||||
if goals in [None]:
|
||||
goals = list(self.goals)
|
||||
if holes in [None]:
|
||||
holes = self.holes
|
||||
print(startstate, dims, goals, holes)
|
||||
myenv = GridWorld(dims = dims, startState = startstate)
|
||||
myenv.goals.append(goals)
|
||||
for ii in range(holes[0][0], holes[0][1]+1):
|
||||
for jj in range(holes[1][0], holes[1][1]+1):
|
||||
print("adding hole at ", ii, jj)
|
||||
myenv.holes.append([ii,jj])
|
||||
return myenv
|
||||
|
||||
def run_trial(self, agent, env=None):
|
||||
if env in [None]:
|
||||
env = self.env
|
||||
agent.reset() # soft-reset() (keeps learned weights)
|
||||
nextState = env.reset()
|
||||
lastState = nextState
|
||||
runtime = 0
|
||||
while True:
|
||||
runtime += 1
|
||||
status = 'alive'
|
||||
# set agent senses based on environment and allow agent to determine an action
|
||||
agent.sensoryState = nextState
|
||||
agent.plasticUpdate()
|
||||
# determine effect on environment state & any reward (in standard openAI-gym API format)
|
||||
nextState, reward, goal_achieved, _ = env.step(agent.action)
|
||||
#if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)):
|
||||
# print(agent.action, lastState, reward, goal_achieved, nextState)
|
||||
lastState = nextState
|
||||
agent.reward = reward
|
||||
if goal_achieved or (runtime >= self.max_steps): break
|
||||
# stop trial if agent explitly failed early
|
||||
elif reward <= self.killed_reward:
|
||||
agent.sensoryState = nextState
|
||||
agent.reward = reward
|
||||
agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward
|
||||
agent.reset()
|
||||
nextState = env.reset()
|
||||
status = 'killed'
|
||||
runtime = self.max_steps
|
||||
break
|
||||
# print(time, agent.action, agent.reward, status)
|
||||
#print(" runtime", runtime)
|
||||
#if goal_achieved:
|
||||
# print(" Goal Achieved!!!")
|
||||
return agent, runtime
|
||||
|
||||
def evaluate(self, ind, return_agent=False):
|
||||
"""
|
||||
|
||||
"""
|
||||
latest = 20
|
||||
# Pull weights from ind
|
||||
# Instantiate an Agent
|
||||
myagent = Agent(obsSpace=self.obsSpace, actSpace=self.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
|
||||
# Put weights in the Agent
|
||||
myagent.weights = [x for x in ind]
|
||||
#print(" myagent.weights", myagent.weights)
|
||||
# run_trial calls
|
||||
time_to_solve_each_trial = [] # lower is better
|
||||
for trialN in range(self.max_training_trials):
|
||||
# some output to see it running
|
||||
# if (trialN % 10) == 0: print('.',end='')
|
||||
myagent, runtime = self.run_trial(myagent)
|
||||
# record trial results
|
||||
time_to_solve_each_trial.append(runtime)
|
||||
#print(" tts", time_to_solve_each_trial)
|
||||
# calculate fitness
|
||||
# Fitness is 1 - (avg. tts / max. time)
|
||||
# w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
|
||||
ltts = len(time_to_solve_each_trial)
|
||||
latest = ltts // 2
|
||||
# Latter half of steps
|
||||
#w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps))
|
||||
# First half of steps
|
||||
w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps))
|
||||
# return the fitness
|
||||
#print(" fitness", "%3.2f" % w)
|
||||
#print(" myagent.weights after", myagent.weights)
|
||||
if return_agent:
|
||||
return myagent, w, time_to_solve_each_trial
|
||||
else:
|
||||
return w,
|
||||
|
||||
|
||||
class MaxAve(object):
|
||||
def __init__(self, alpha=0.1):
|
||||
self.alpha = alpha
|
||||
pass
|
||||
|
||||
def get_weights_len(self, wl=100):
|
||||
return wl
|
||||
|
||||
def evaluate(self, ind):
|
||||
npwts = np.array([x for x in ind])
|
||||
wtmax = np.max(np.abs(npwts))
|
||||
wtmean = np.mean(np.abs(npwts))
|
||||
if 0.0 != wtmax:
|
||||
w = wtmean / wtmax
|
||||
else:
|
||||
w = 0.0
|
||||
return w,
|
||||
|
||||
class EvolveWeights(object):
|
||||
"""
|
||||
Class to apply DEAP to evolve a population consisting of a set
|
||||
of weights.
|
||||
"""
|
||||
|
||||
def __init__(self, gahw,
|
||||
popsize=100, maxgenerations=10000,
|
||||
cxpb=0.5, mtpb=0.05,
|
||||
wmin=-20.0, wmax=20.0,
|
||||
mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='gahw'
|
||||
):
|
||||
self.tag = tag
|
||||
self.starttime = isotime()
|
||||
self.logbase = tag + "_" + t2fn(self.starttime)
|
||||
|
||||
self.gahw = gahw
|
||||
self.weights_len = gahw.get_weights_len()
|
||||
|
||||
self.popsize = popsize
|
||||
self.maxgenerations = maxgenerations
|
||||
self.cxpb = cxpb
|
||||
self.mtpb = mtpb
|
||||
self.wmin = wmin
|
||||
self.wmax = wmax
|
||||
self.mut_center = mut_center
|
||||
self.mut_sigma = mut_sigma
|
||||
self.mut_indpb = mut_indpb
|
||||
self.tournsize = tournsize
|
||||
self.tournk = tournk
|
||||
self.normalize_fitness = normalize_fitness
|
||||
pass
|
||||
|
||||
def masv(self, pop):
|
||||
mav = []
|
||||
maxs = []
|
||||
for ind in pop:
|
||||
wts = [x for x in ind]
|
||||
mav.append(np.mean(np.abs(wts)))
|
||||
maxs.append(np.max(np.abs(wts)))
|
||||
allmax = np.max(maxs)
|
||||
mymasv = [x/allmax for x in mav]
|
||||
return mymasv
|
||||
|
||||
def cxTwoPointCopy(self, ind1, ind2):
|
||||
"""Execute a two points crossover with copy on the input individuals. The
|
||||
copy is required because the slicing in numpy returns a view of the data,
|
||||
which leads to a self overwriting in the swap operation. It prevents
|
||||
::
|
||||
>>> import numpy as np
|
||||
>>> a = np.array((1,2,3,4))
|
||||
>>> b = np.array((5,6,7,8))
|
||||
>>> a[1:3], b[1:3] = b[1:3], a[1:3]
|
||||
>>> print(a)
|
||||
[1 6 7 4]
|
||||
>>> print(b)
|
||||
[5 6 7 8]
|
||||
"""
|
||||
size = len(ind1)
|
||||
cxpoint1 = random.randint(1, size)
|
||||
cxpoint2 = random.randint(1, size - 1)
|
||||
if cxpoint2 >= cxpoint1:
|
||||
cxpoint2 += 1
|
||||
else: # Swap the two cx points
|
||||
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
|
||||
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
|
||||
return ind1, ind2
|
||||
|
||||
def zero(self):
|
||||
return 0.0
|
||||
|
||||
def smallrandom(self, eps=None):
|
||||
"""
|
||||
Produce a small random number in [-eps .. eps].
|
||||
|
||||
A random variate in [-1 .. 1] is produced then
|
||||
multiplied by eps, so the final range is in [-eps .. eps].
|
||||
|
||||
"""
|
||||
if eps in [None]:
|
||||
eps = self.gahw.alpha
|
||||
rv = ((2.0 * random.random()) - 1.0) * eps
|
||||
return rv
|
||||
|
||||
def setup(self):
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
|
||||
|
||||
self.toolbox = base.Toolbox()
|
||||
|
||||
self.pool = multiprocessing.Pool()
|
||||
self.toolbox.register("map", self.pool.map)
|
||||
|
||||
#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
|
||||
# self.toolbox.register("attr_float", random.random)
|
||||
#self.toolbox.register("attr_float", self.zero)
|
||||
self.toolbox.register("attr_float", self.smallrandom)
|
||||
|
||||
self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
|
||||
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
|
||||
|
||||
self.toolbox.register("evaluate", self.gahw.evaluate)
|
||||
#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
|
||||
self.toolbox.register("mate", self.cxTwoPointCopy)
|
||||
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
|
||||
self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
|
||||
self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)
|
||||
|
||||
def normalize_fitnesses(self, fitnesses):
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
return fitnesses
|
||||
|
||||
def log_it(self, generation):
|
||||
pool = self.pool
|
||||
toolbox = self.toolbox
|
||||
self.pool = None
|
||||
self.toolbox = None
|
||||
pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
|
||||
pickle.dump(self, open(pklfn, "wb"))
|
||||
self.pool = pool
|
||||
self.toolbox = toolbox
|
||||
|
||||
def loop(self):
|
||||
self.population = self.toolbox.population(n=self.popsize)
|
||||
#print(self.masv(self.population))
|
||||
NGEN=self.maxgenerations
|
||||
for gen in range(NGEN):
|
||||
print("generation", gen)
|
||||
offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
|
||||
# print("offspring", offspring)
|
||||
# constrain genome values to [0,1]
|
||||
for offspring_i,individual in enumerate(offspring):
|
||||
np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
|
||||
# print("clipped offspring", offspring)
|
||||
# Evaluate the individuals with an invalid fitness (not yet evaluated)
|
||||
# print("check fitness.valid")
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
# print("invalid_ind", len(invalid_ind))
|
||||
#print("setting fitness")
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
"""
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
"""
|
||||
# print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
self.fitness_dist(fitnesses)
|
||||
# print("update ind fitness")
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
#print("selection")
|
||||
#print("offspring\n", self.masv(offspring))
|
||||
self.offspring = offspring
|
||||
self.population = self.toolbox.select(offspring, k=len(self.population))
|
||||
if 0 == gen % 100:
|
||||
self.log_it(gen)
|
||||
|
||||
#print("population after selection\n", self.masv(self.population))
|
||||
#print("Report for generation", gen)
|
||||
self.report()
|
||||
|
||||
def report(self):
|
||||
# post-evolution analysis
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
self.fitnesses = fitnesses
|
||||
self.sortedFitnesses = sorted(fitnesses)
|
||||
self.sortedFitnesses.reverse()
|
||||
self.fitness_dist(fitnesses)
|
||||
|
||||
self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
|
||||
print("best/worst w", self.bestFitness, self.worstFitness)
|
||||
|
||||
self.bestGenome = tools.selBest(self.population, k=1)
|
||||
# print(self.bestGenome)
|
||||
|
||||
def ffmt(self, value, fmt="%3.2f"):
|
||||
return fmt % value
|
||||
|
||||
def fitness_dist(self, fitnesses):
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
|
||||
print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
|
||||
|
||||
def driver(self):
|
||||
# Initialize
|
||||
self.setup()
|
||||
# Generation loop
|
||||
self.loop()
|
||||
# Report
|
||||
self.report()
|
||||
self.log_it(self.maxgenerations)
|
||||
print(self.masv(self.population))
|
||||
pass
|
||||
|
||||
def holes_block_direct_route():
|
||||
# GridWorld as in 'gridworld.ipynb'
|
||||
gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200)
|
||||
ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=100, tournk=2, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
def maxave():
|
||||
ma = MaxAve()
|
||||
ew = EvolveWeights(ma, popsize = 100, maxgenerations=100)
|
||||
ew.driver()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
holes_block_direct_route()
|
||||
# maxave()
|
||||
|
||||
pass
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
import numpy as np
|
||||
from typing import Any, Union, List, Tuple
|
||||
|
||||
class MultiDimArray:
|
||||
"""
|
||||
A class to represent and manipulate multi-dimensional arrays.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
mdary : numpy.ndarray
|
||||
A multi-dimensional array containing the input data.
|
||||
shape : tuple
|
||||
The shape of the input multi-dimensional array.
|
||||
|
||||
Methods
|
||||
-------
|
||||
flatten(output_type="list") -> Union[List, Tuple, np.ndarray]:
|
||||
Returns the flattened version of the multi-dimensional array as a list, tuple, or Numpy array.
|
||||
|
||||
foldout(vector, output_type="list") -> Union[List, Tuple, np.ndarray]:
|
||||
Reshapes a 1D vector back into the original shape of the multi-dimensional array,
|
||||
and returns it as a list, tuple, or Numpy array.
|
||||
"""
|
||||
def __init__(self, mdary: Union[List, Tuple, np.ndarray]):
|
||||
self.mdary = np.array(mdary)
|
||||
self.shape = self.mdary.shape
|
||||
|
||||
def flatten(self, output_type: str = "list") -> Union[List, Tuple, np.ndarray]:
|
||||
"""
|
||||
Flatten the multi-dimensional array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_type : str, optional
|
||||
The output type of the flattened array, either 'list', 'tuple', or 'numpy' (default is 'list').
|
||||
|
||||
Returns
|
||||
-------
|
||||
Union[List, Tuple, np.ndarray]
|
||||
The flattened version of the multi-dimensional array in the specified output
|
||||
"""
|
||||
flat_array = self.mdary.flatten()
|
||||
|
||||
if output_type == "list":
|
||||
return flat_array.tolist()
|
||||
elif output_type == "tuple":
|
||||
return tuple(flat_array)
|
||||
elif output_type == "numpy":
|
||||
return flat_array
|
||||
else:
|
||||
raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'")
|
||||
|
||||
def foldout(self, vector: Union[List, Tuple, np.ndarray], output_type: str = "list") -> Union[List, Tuple, np.ndarray]:
|
||||
if len(vector) != self.mdary.size:
|
||||
raise ValueError("The input vector must have the same length as the flattened form of the multi-dimensional array")
|
||||
|
||||
reshaped_array = np.reshape(vector, self.shape)
|
||||
|
||||
if output_type == "list":
|
||||
return reshaped_array.tolist()
|
||||
elif output_type == "tuple":
|
||||
return tuple(map(tuple, reshaped_array))
|
||||
elif output_type == "numpy":
|
||||
return reshaped_array
|
||||
else:
|
||||
raise ValueError("Invalid output_type. Choose 'list', 'tuple', or 'numpy'")
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Example usage:
|
||||
"""
|
||||
mda = MultiDimArray([[1, 2], [3, 4], [5,6]])
|
||||
#mda = MultiDimArray([1, 2, 3, 4, 5,6])
|
||||
print(f"Input array: {str(mda.mdary.tolist())}")
|
||||
flat = mda.flatten(output_type="list")
|
||||
print(f"Flattened array: {flat}")
|
||||
|
||||
# Assuming the flat array is [1, 2, 3, 4]
|
||||
folded = mda.foldout(flat, output_type="list")
|
||||
print(f"Folded back array: {folded}")
|
||||
|
||||
"""
|
||||
The folded back array should be numerically identical to the original mdary:
|
||||
[[1, 2], [3, 4]]
|
||||
"""
|
||||
|
|
@ -0,0 +1,568 @@
|
|||
"""multigwe.py -- Multi GridWorlds Evolving
|
||||
|
||||
Bringing together an Agent acting in one of multiple GridWorlds with
|
||||
DEAP evolutionary computation.
|
||||
|
||||
Notion: Set up for being able to call an Agent with a provided set of
|
||||
weights and run their training in one of multiple Gridworld
|
||||
environments. DEAP keeps a population of weights and handles the
|
||||
evolutionary computation. Save the best instantiated Agent per each
|
||||
generation for later review and analysis.
|
||||
|
||||
"""
|
||||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
|
||||
import numpy as np, itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
import environments
|
||||
import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
importlib.reload(environments)
|
||||
importlib.reload(agents)
|
||||
from environments.gridworld import GridWorld
|
||||
from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
from mda import MultiDimArray
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class GoalsAndHolesWorld(object):
|
||||
"""
|
||||
Class for making and using a 2D GridWorld based on
|
||||
setting goals and holes (hazards) for an RL Agent
|
||||
to explore.
|
||||
|
||||
Modifications for multiple maps...
|
||||
Need a 'maps' array
|
||||
|
||||
"""
|
||||
def __init__(self, obsSpace, actSpace, goals, holes, startstate, agentclass,
|
||||
killed_reward=-10.0, max_training_trials=50, max_steps=32,
|
||||
alpha=0.005, gamma=0.95, epsilon=0.01, lmbda=0.42
|
||||
):
|
||||
|
||||
self.maps = []
|
||||
mymap = Holder()
|
||||
self.add_map(obsSpace, actSpace, goals, holes, startstate)
|
||||
# Instance now has the initial map in place
|
||||
|
||||
self.agentclass = agentclass
|
||||
self.killed_reward = killed_reward
|
||||
self.max_training_trials = max_training_trials
|
||||
self.max_steps = max_steps
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.epsilon = epsilon
|
||||
self.lmbda = lmbda
|
||||
print("Goals from initial env", self.maps[0].env.goals)
|
||||
pass
|
||||
|
||||
def get_weights_len(self):
|
||||
mywl = np.prod(tuple(self.maps[0].obsSpace) + tuple(self.maps[0].actSpace))
|
||||
return mywl
|
||||
|
||||
def add_map(self, obsSpace, actSpace, goals, holes, startstate):
|
||||
mymap = Holder()
|
||||
mymap.obsSpace = tuple(obsSpace)
|
||||
mymap.actSpace = tuple(actSpace)
|
||||
mymap.goals = list(goals)
|
||||
mymap.holes = tuple(holes)
|
||||
mymap.startState = tuple(startstate)
|
||||
mymap.env = self.make_env(mymap.startState, mymap.obsSpace, mymap.goals, mymap.holes)
|
||||
self.maps.append(mymap)
|
||||
|
||||
def make_env(self, startstate=None, dims=None, goals=None, holes=None):
|
||||
# Default: the first map in the list.
|
||||
if startstate in [None] and 0 < len(self.maps):
|
||||
startstate = self.maps[0].startState
|
||||
if dims in [None] and 0 < len(self.maps):
|
||||
dims = self.maps[0].obsSpace
|
||||
if goals in [None] and 0 < len(self.maps):
|
||||
goals = list(self.maps[0].goals)
|
||||
if holes in [None] and 0 < len(self.maps):
|
||||
holes = self.maps[0].holes
|
||||
print(startstate, dims, goals, holes)
|
||||
myenv = GridWorld(dims = dims, startState = startstate)
|
||||
myenv.goals.append(goals)
|
||||
for ii in range(holes[0][0], holes[0][1]+1):
|
||||
for jj in range(holes[1][0], holes[1][1]+1):
|
||||
print("adding hole at ", ii, jj)
|
||||
myenv.holes.append([ii,jj])
|
||||
return myenv
|
||||
|
||||
def run_trial(self, agent, env=None):
|
||||
if env in [None]:
|
||||
# Choose an environment
|
||||
"""
|
||||
if 1 == len(self.maps):
|
||||
mymap = self.maps[0]
|
||||
else:
|
||||
mymap = random.choice(self.maps)
|
||||
"""
|
||||
mymap = self.choose_map()
|
||||
env = mymap.env
|
||||
|
||||
agent.reset() # soft-reset() (keeps learned weights)
|
||||
nextState = env.reset()
|
||||
lastState = nextState
|
||||
runtime = 0
|
||||
while True:
|
||||
runtime += 1
|
||||
status = 'alive'
|
||||
# set agent senses based on environment and allow agent to determine an action
|
||||
agent.sensoryState = nextState
|
||||
agent.plasticUpdate()
|
||||
# determine effect on environment state & any reward (in standard openAI-gym API format)
|
||||
nextState, reward, goal_achieved, _ = env.step(agent.action)
|
||||
|
||||
#if (tuple(lastState) == tuple(self.env.goals)) or (tuple(nextState) == tuple(self.env.goals)):
|
||||
# print(agent.action, lastState, reward, goal_achieved, nextState)
|
||||
lastState = nextState
|
||||
agent.reward = reward
|
||||
if goal_achieved or (runtime >= self.max_steps): break
|
||||
# stop trial if agent explitly failed early
|
||||
elif reward <= self.killed_reward:
|
||||
agent.sensoryState = nextState
|
||||
agent.reward = reward
|
||||
agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward
|
||||
agent.reset()
|
||||
nextState = env.reset()
|
||||
status = 'killed'
|
||||
runtime = self.max_steps
|
||||
break
|
||||
# print(time, agent.action, agent.reward, status)
|
||||
#print(" runtime", runtime)
|
||||
#if goal_achieved:
|
||||
# print(" Goal Achieved!!!")
|
||||
return agent, runtime
|
||||
|
||||
def choose_map(self, map_index=None):
|
||||
"""
|
||||
If map_index in [0..len(self.maps)], return that one.
|
||||
Else return one randomly.
|
||||
"""
|
||||
# print("self.maps", self.maps)
|
||||
|
||||
if map_index in [None]:
|
||||
# Random choice of map from alternatives
|
||||
if 1 == len(self.maps): # There can only be one
|
||||
mymap = self.maps[0]
|
||||
else: # Choose one of them
|
||||
mymap = random.choice(self.maps)
|
||||
elif 0 <= map_index and map_index < len(self.maps):
|
||||
mymap = self.maps[map_index]
|
||||
else:
|
||||
mymap = random.choice(self.maps)
|
||||
return mymap
|
||||
|
||||
def evaluate(self, ind, return_agent=False):
|
||||
"""
|
||||
|
||||
"""
|
||||
latest = 20
|
||||
# Pull weights from ind
|
||||
|
||||
# Choose an environment
|
||||
"""
|
||||
if 1 == len(self.maps):
|
||||
mymap = self.maps[0]
|
||||
else:
|
||||
mymap = random.choice(self.maps)
|
||||
"""
|
||||
|
||||
# New way
|
||||
mymap = self.choose_map()
|
||||
|
||||
myenv = mymap.env
|
||||
|
||||
# Instantiate an Agent
|
||||
myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
|
||||
|
||||
# Should consider one round of single trial to get the performance due to
|
||||
# inheritance, then proceed with full trials to 'develop' the agent,
|
||||
# and get its trained performance.
|
||||
|
||||
# Put weights in the Agent
|
||||
myagent.weights = [x for x in ind]
|
||||
#print(" myagent.weights", myagent.weights)
|
||||
# run_trial calls
|
||||
time_to_solve_each_trial = [] # lower is better
|
||||
for trialN in range(self.max_training_trials):
|
||||
# some output to see it running
|
||||
# if (trialN % 10) == 0: print('.',end='')
|
||||
myagent, runtime = self.run_trial(myagent, env=myenv)
|
||||
# record trial results
|
||||
time_to_solve_each_trial.append(runtime)
|
||||
#print(" tts", time_to_solve_each_trial)
|
||||
# calculate fitness
|
||||
# Fitness is 1 - (avg. tts / max. time)
|
||||
# w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
|
||||
ltts = len(time_to_solve_each_trial)
|
||||
latest = ltts // 2
|
||||
# Latter half of steps
|
||||
#w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[-latest:]) / self.max_steps))
|
||||
# First half of steps
|
||||
w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial[:-latest]) / self.max_steps))
|
||||
# return the fitness
|
||||
#print(" fitness", "%3.2f" % w)
|
||||
#print(" myagent.weights after", myagent.weights)
|
||||
if return_agent:
|
||||
return myagent, w, time_to_solve_each_trial
|
||||
else:
|
||||
return w,
|
||||
|
||||
|
||||
def multi_evaluate(self, ind, return_agent=False):
|
||||
"""
|
||||
Like 'evaluate', but when multiple maps exist, evaluate per
|
||||
each map, collect performance, and return fitness as the
|
||||
mean performance across all maps.
|
||||
|
||||
"""
|
||||
latest = 20
|
||||
# Pull weights from ind
|
||||
|
||||
# Info across all maps/environments
|
||||
time_to_solve_each_trial = [] # lower is better
|
||||
for mymap in self.maps:
|
||||
myenv = mymap.env
|
||||
# Instantiate an Agent
|
||||
myagent = Agent(obsSpace=mymap.obsSpace, actSpace=mymap.actSpace, alpha=self.alpha, gamma=self.gamma, epsilon=self.epsilon, lmbda=self.lmbda)
|
||||
# Put weights in the Agent
|
||||
myagent.weights = [x for x in ind]
|
||||
#print(" myagent.weights", myagent.weights)
|
||||
# run_trial calls
|
||||
for trialN in range(self.max_training_trials):
|
||||
# some output to see it running
|
||||
# if (trialN % 10) == 0: print('.',end='')
|
||||
myagent, runtime = self.run_trial(myagent, env=myenv)
|
||||
# record trial results
|
||||
time_to_solve_each_trial.append(runtime)
|
||||
|
||||
# calculate fitness
|
||||
# Fitness is 1 - (avg. tts / max. time)
|
||||
w = max(0.0, 1.0 - (np.mean(time_to_solve_each_trial) / self.max_steps))
|
||||
# return the fitness
|
||||
if return_agent:
|
||||
return myagent, w, time_to_solve_each_trial
|
||||
else:
|
||||
return w,
|
||||
|
||||
class MaxAve(object):
|
||||
def __init__(self, alpha=0.1):
|
||||
self.alpha = alpha
|
||||
pass
|
||||
|
||||
def get_weights_len(self, wl=100):
|
||||
return wl
|
||||
|
||||
def evaluate(self, ind):
|
||||
npwts = np.array([x for x in ind])
|
||||
wtmax = np.max(np.abs(npwts))
|
||||
wtmean = np.mean(np.abs(npwts))
|
||||
if 0.0 != wtmax:
|
||||
w = wtmean / wtmax
|
||||
else:
|
||||
w = 0.0
|
||||
return w,
|
||||
|
||||
class EvolveWeights(object):
|
||||
"""
|
||||
Class to apply DEAP to evolve a population consisting of a set
|
||||
of weights.
|
||||
"""
|
||||
|
||||
def __init__(self, gahw,
|
||||
popsize=100, maxgenerations=10000,
|
||||
cxpb=0.5, mtpb=0.05,
|
||||
wmin=-20.0, wmax=20.0,
|
||||
mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='gahw'
|
||||
):
|
||||
self.tag = tag
|
||||
self.starttime = isotime()
|
||||
self.logbase = tag + "_" + t2fn(self.starttime)
|
||||
|
||||
self.gahw = gahw
|
||||
self.weights_len = gahw.get_weights_len()
|
||||
|
||||
self.popsize = popsize
|
||||
self.maxgenerations = maxgenerations
|
||||
self.cxpb = cxpb
|
||||
self.mtpb = mtpb
|
||||
self.wmin = wmin
|
||||
self.wmax = wmax
|
||||
self.mut_center = mut_center
|
||||
self.mut_sigma = mut_sigma
|
||||
self.mut_indpb = mut_indpb
|
||||
self.tournsize = tournsize
|
||||
self.tournk = tournk
|
||||
self.normalize_fitness = normalize_fitness
|
||||
pass
|
||||
|
||||
def masv(self, pop):
|
||||
mav = []
|
||||
maxs = []
|
||||
for ind in pop:
|
||||
wts = [x for x in ind]
|
||||
mav.append(np.mean(np.abs(wts)))
|
||||
maxs.append(np.max(np.abs(wts)))
|
||||
allmax = np.max(maxs)
|
||||
mymasv = [x/allmax for x in mav]
|
||||
return mymasv
|
||||
|
||||
def cxTwoPointCopy(self, ind1, ind2):
|
||||
"""Execute a two points crossover with copy on the input individuals. The
|
||||
copy is required because the slicing in numpy returns a view of the data,
|
||||
which leads to a self overwriting in the swap operation. It prevents
|
||||
::
|
||||
>>> import numpy as np
|
||||
>>> a = np.array((1,2,3,4))
|
||||
>>> b = np.array((5,6,7,8))
|
||||
>>> a[1:3], b[1:3] = b[1:3], a[1:3]
|
||||
>>> print(a)
|
||||
[1 6 7 4]
|
||||
>>> print(b)
|
||||
[5 6 7 8]
|
||||
"""
|
||||
size = len(ind1)
|
||||
cxpoint1 = random.randint(1, size)
|
||||
cxpoint2 = random.randint(1, size - 1)
|
||||
if cxpoint2 >= cxpoint1:
|
||||
cxpoint2 += 1
|
||||
else: # Swap the two cx points
|
||||
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
|
||||
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
|
||||
return ind1, ind2
|
||||
|
||||
def zero(self):
|
||||
return 0.0
|
||||
|
||||
def smallrandom(self, eps=None):
|
||||
"""
|
||||
Produce a small random number in [-eps .. eps].
|
||||
|
||||
A random variate in [-1 .. 1] is produced then
|
||||
multiplied by eps, so the final range is in [-eps .. eps].
|
||||
|
||||
"""
|
||||
if eps in [None]:
|
||||
eps = self.gahw.alpha
|
||||
rv = ((2.0 * random.random()) - 1.0) * eps
|
||||
return rv
|
||||
|
||||
def setup(self):
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
|
||||
|
||||
self.toolbox = base.Toolbox()
|
||||
|
||||
self.pool = multiprocessing.Pool()
|
||||
self.toolbox.register("map", self.pool.map)
|
||||
|
||||
#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
|
||||
# self.toolbox.register("attr_float", random.random)
|
||||
#self.toolbox.register("attr_float", self.zero)
|
||||
self.toolbox.register("attr_float", self.smallrandom)
|
||||
|
||||
self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
|
||||
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
|
||||
|
||||
# self.toolbox.register("evaluate", self.gahw.evaluate)
|
||||
self.toolbox.register("evaluate", self.gahw.multi_evaluate)
|
||||
#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
|
||||
self.toolbox.register("mate", self.cxTwoPointCopy)
|
||||
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
|
||||
self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
|
||||
self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)
|
||||
|
||||
def normalize_fitnesses(self, fitnesses):
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
return fitnesses
|
||||
|
||||
def log_it(self, generation):
|
||||
pool = self.pool
|
||||
toolbox = self.toolbox
|
||||
self.pool = None
|
||||
self.toolbox = None
|
||||
pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
|
||||
pickle.dump(self, open(pklfn, "wb"))
|
||||
self.pool = pool
|
||||
self.toolbox = toolbox
|
||||
|
||||
def loop(self):
|
||||
self.population = self.toolbox.population(n=self.popsize)
|
||||
#print(self.masv(self.population))
|
||||
NGEN=self.maxgenerations
|
||||
for gen in range(NGEN):
|
||||
print("generation", gen)
|
||||
offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
|
||||
# print("offspring", offspring)
|
||||
# constrain genome values to [0,1]
|
||||
for offspring_i,individual in enumerate(offspring):
|
||||
np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
|
||||
# print("clipped offspring", offspring)
|
||||
# Evaluate the individuals with an invalid fitness (not yet evaluated)
|
||||
# print("check fitness.valid")
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
# print("invalid_ind", len(invalid_ind))
|
||||
#print("setting fitness")
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
"""
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
"""
|
||||
# print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
self.fitness_dist(fitnesses)
|
||||
# print("update ind fitness")
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
#print("selection")
|
||||
#print("offspring\n", self.masv(offspring))
|
||||
self.offspring = offspring
|
||||
self.population = self.toolbox.select(offspring, k=len(self.population))
|
||||
if 0 == gen % 100:
|
||||
self.log_it(gen)
|
||||
|
||||
#print("population after selection\n", self.masv(self.population))
|
||||
#print("Report for generation", gen)
|
||||
self.report()
|
||||
|
||||
def report(self):
|
||||
# post-evolution analysis
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
self.fitnesses = fitnesses
|
||||
self.sortedFitnesses = sorted(fitnesses)
|
||||
self.sortedFitnesses.reverse()
|
||||
self.fitness_dist(fitnesses)
|
||||
|
||||
self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
|
||||
print("best/worst w", self.bestFitness, self.worstFitness)
|
||||
|
||||
self.bestGenome = tools.selBest(self.population, k=1)
|
||||
# print(self.bestGenome)
|
||||
|
||||
def ffmt(self, value, fmt="%3.2f"):
|
||||
return fmt % value
|
||||
|
||||
def fitness_dist(self, fitnesses):
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
|
||||
print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
|
||||
|
||||
def driver(self):
|
||||
# Initialize
|
||||
self.setup()
|
||||
# Generation loop
|
||||
self.loop()
|
||||
# Report
|
||||
self.report()
|
||||
self.log_it(self.maxgenerations)
|
||||
print(self.masv(self.population))
|
||||
pass
|
||||
|
||||
def holes_block_direct_route():
|
||||
# GridWorld as in 'gridworld.ipynb'
|
||||
gahw = GoalsAndHolesWorld((4,12), (4,), (3,11), [[3,3],[1,10]], (3,0), Agent, max_steps=200)
|
||||
ew = EvolveWeights(gahw, popsize=100, maxgenerations=10000, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
|
||||
def holes_block_direct_route_two_goals():
|
||||
# GridWorld as in 'gridworld.ipynb'
|
||||
gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
|
||||
gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6))
|
||||
ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
|
||||
def holes_block_direct_route_two_goals_left():
|
||||
# GridWorld as in 'gridworld.ipynb'
|
||||
gahw = GoalsAndHolesWorld((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
|
||||
gahw.add_map((4,13), (4,), (3,0), [[3,3],[1,11]], (2,6))
|
||||
ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
def holes_block_direct_route_two_goals_right():
|
||||
# GridWorld as in 'gridworld.ipynb'
|
||||
gahw = GoalsAndHolesWorld((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6), Agent, max_steps=200)
|
||||
gahw.add_map((4,13), (4,), (3,12), [[3,3],[1,11]], (2,6))
|
||||
ew = EvolveWeights(gahw, popsize=100, maxgenerations=100, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
|
||||
|
||||
def maxave():
|
||||
ma = MaxAve()
|
||||
ew = EvolveWeights(ma, popsize = 100, maxgenerations=500)
|
||||
ew.driver()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
#holes_block_direct_route()
|
||||
print("Two different goals")
|
||||
holes_block_direct_route_two_goals()
|
||||
print("Two environments, both have goal on left.")
|
||||
holes_block_direct_route_two_goals_left()
|
||||
print("Two environments, both have goal on right.")
|
||||
holes_block_direct_route_two_goals_right()
|
||||
|
||||
# maxave()
|
||||
|
||||
pass
|
||||
|
|
@ -0,0 +1,328 @@
|
|||
"""
|
||||
pe.py
|
||||
|
||||
puzzles evolving
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
# allow importing from the 'code/' dir
|
||||
sys.path.append("../code")
|
||||
|
||||
import os
|
||||
import platform
|
||||
import pickle
|
||||
import json
|
||||
import traceback
|
||||
import datetime
|
||||
import copy
|
||||
|
||||
import numpy as np, itertools, copy
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
import importlib # module reloading
|
||||
|
||||
import environments
|
||||
import agents
|
||||
|
||||
# always forces a reload in case you have edited environments or agents
|
||||
importlib.reload(environments)
|
||||
importlib.reload(agents)
|
||||
#from environments.gridworld import GridWorld
|
||||
import environments.puzzle as pz
|
||||
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
||||
|
||||
from agents.q_agent import EvolvableAgent as Agent
|
||||
|
||||
# DEAP imports
|
||||
|
||||
import random
|
||||
from deap import creator, base, tools, algorithms
|
||||
|
||||
import multiprocessing
|
||||
|
||||
#pool = multiprocessing.Pool()
|
||||
#toolbox.register("map", pool.map)
|
||||
|
||||
# Weight handling
|
||||
from mda import MultiDimArray
|
||||
|
||||
def isotime():
|
||||
return datetime.datetime.now().isoformat()
|
||||
|
||||
def t2fn(timestamp):
|
||||
timestamp = timestamp.replace('.','_')
|
||||
timestamp = timestamp.replace(':','_')
|
||||
return timestamp
|
||||
|
||||
class Holder(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class EvolveWeights(object):
|
||||
"""
|
||||
Class to apply DEAP to evolve a population consisting of a set
|
||||
of weights.
|
||||
"""
|
||||
|
||||
def __init__(self, environ,
|
||||
popsize=100, maxgenerations=10000,
|
||||
cxpb=0.5, mtpb=0.05,
|
||||
wmin=-20.0, wmax=20.0,
|
||||
mut_center=0.0, mut_sigma=0.1, mut_indpb=0.05,
|
||||
tournsize=5,
|
||||
tournk=2,
|
||||
normalize_fitness=True,
|
||||
tag='environ'
|
||||
):
|
||||
self.tag = tag
|
||||
self.starttime = isotime()
|
||||
self.logbase = tag + "_" + t2fn(self.starttime)
|
||||
|
||||
self.environ = environ
|
||||
self.weights_len = environ.get_weights_len()
|
||||
|
||||
self.popsize = popsize
|
||||
self.maxgenerations = maxgenerations
|
||||
self.cxpb = cxpb
|
||||
self.mtpb = mtpb
|
||||
self.wmin = wmin
|
||||
self.wmax = wmax
|
||||
self.mut_center = mut_center
|
||||
self.mut_sigma = mut_sigma
|
||||
self.mut_indpb = mut_indpb
|
||||
self.tournsize = tournsize
|
||||
self.tournk = tournk
|
||||
self.normalize_fitness = normalize_fitness
|
||||
pass
|
||||
|
||||
def masv(self, pop):
|
||||
mav = []
|
||||
maxs = []
|
||||
for ind in pop:
|
||||
wts = [x for x in ind]
|
||||
mav.append(np.mean(np.abs(wts)))
|
||||
maxs.append(np.max(np.abs(wts)))
|
||||
allmax = np.max(maxs)
|
||||
mymasv = [x/allmax for x in mav]
|
||||
return mymasv
|
||||
|
||||
def cxTwoPointCopy(self, ind1, ind2):
|
||||
"""Execute a two points crossover with copy on the input individuals. The
|
||||
copy is required because the slicing in numpy returns a view of the data,
|
||||
which leads to a self overwriting in the swap operation. It prevents
|
||||
::
|
||||
>>> import numpy as np
|
||||
>>> a = np.array((1,2,3,4))
|
||||
>>> b = np.array((5,6,7,8))
|
||||
>>> a[1:3], b[1:3] = b[1:3], a[1:3]
|
||||
>>> print(a)
|
||||
[1 6 7 4]
|
||||
>>> print(b)
|
||||
[5 6 7 8]
|
||||
"""
|
||||
size = len(ind1)
|
||||
cxpoint1 = random.randint(1, size)
|
||||
cxpoint2 = random.randint(1, size - 1)
|
||||
if cxpoint2 >= cxpoint1:
|
||||
cxpoint2 += 1
|
||||
else: # Swap the two cx points
|
||||
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
|
||||
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
|
||||
return ind1, ind2
|
||||
|
||||
def zero(self):
|
||||
return 0.0
|
||||
|
||||
def smallrandom(self, eps=None):
|
||||
"""
|
||||
Produce a small random number in [-eps .. eps].
|
||||
|
||||
A random variate in [-1 .. 1] is produced then
|
||||
multiplied by eps, so the final range is in [-eps .. eps].
|
||||
|
||||
"""
|
||||
if eps in [None]:
|
||||
eps = self.environ.alpha
|
||||
rv = ((2.0 * random.random()) - 1.0) * eps
|
||||
return rv
|
||||
|
||||
def setup(self):
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)
|
||||
|
||||
self.toolbox = base.Toolbox()
|
||||
|
||||
self.pool = multiprocessing.Pool()
|
||||
self.toolbox.register("map", self.pool.map)
|
||||
|
||||
#toolbox.register("attr_bool", random.randint, 0, 1) # non-numpy non-float version
|
||||
# self.toolbox.register("attr_float", random.random)
|
||||
#self.toolbox.register("attr_float", self.zero)
|
||||
self.toolbox.register("attr_float", self.smallrandom)
|
||||
|
||||
self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_float, n=self.weights_len)
|
||||
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
|
||||
|
||||
# self.toolbox.register("evaluate", self.environ.evaluate)
|
||||
self.toolbox.register("evaluate", self.environ.evaluate)
|
||||
#toolbox.register("mate", tools.cxTwoPoint) # non-numpy non-float version
|
||||
self.toolbox.register("mate", self.cxTwoPointCopy)
|
||||
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) # non-numpy non-float version
|
||||
self.toolbox.register("mutate", tools.mutGaussian, mu=self.mut_center, sigma=self.mut_sigma, indpb=self.mut_indpb)
|
||||
self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize, k=self.tournk)
|
||||
|
||||
def normalize_fitnesses(self, fitnesses):
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
return fitnesses
|
||||
|
||||
def log_it(self, generation):
|
||||
pool = self.pool
|
||||
toolbox = self.toolbox
|
||||
self.pool = None
|
||||
self.toolbox = None
|
||||
pklfn = f"{self.logbase}__{generation+1}-{self.maxgenerations}.pkl"
|
||||
pickle.dump(self, open(pklfn, "wb"))
|
||||
self.pool = pool
|
||||
self.toolbox = toolbox
|
||||
|
||||
def loop(self):
|
||||
self.population = self.toolbox.population(n=self.popsize)
|
||||
#print(self.masv(self.population))
|
||||
NGEN=self.maxgenerations
|
||||
for gen in range(NGEN):
|
||||
print("generation", gen)
|
||||
offspring = algorithms.varAnd(self.population, self.toolbox, cxpb=self.cxpb, mutpb=self.mtpb)
|
||||
# print("offspring", offspring)
|
||||
# constrain genome values to [0,1]
|
||||
for offspring_i,individual in enumerate(offspring):
|
||||
np.clip(np.array(offspring[offspring_i]), self.wmin, self.wmax)
|
||||
# print("clipped offspring", offspring)
|
||||
# Evaluate the individuals with an invalid fitness (not yet evaluated)
|
||||
# print("check fitness.valid")
|
||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||
# print("invalid_ind", len(invalid_ind))
|
||||
#print("setting fitness")
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, invalid_ind)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
"""
|
||||
#print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
maxfitness = np.max([x[0] for x in fitnesses])
|
||||
#print("maxfitness", maxfitness)
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
#print("listfit", listfit)
|
||||
normfit = [x/maxfitness for x in listfit]
|
||||
#print("normfit", normfit)
|
||||
fitnesses = [tuple([x]) for x in normfit]
|
||||
#print("normed fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
"""
|
||||
print("fitnesses", ["%3.2f" % x[0] for x in fitnesses])
|
||||
self.fitness_dist(fitnesses)
|
||||
# print("update ind fitness")
|
||||
for ind, fit in zip(invalid_ind, fitnesses):
|
||||
ind.fitness.values = fit
|
||||
#print("selection")
|
||||
#print("offspring\n", self.masv(offspring))
|
||||
self.offspring = offspring
|
||||
self.population = self.toolbox.select(offspring, k=len(self.population))
|
||||
if 0 == gen % 100:
|
||||
self.log_it(gen)
|
||||
|
||||
#print("population after selection\n", self.masv(self.population))
|
||||
#print("Report for generation", gen)
|
||||
self.report()
|
||||
|
||||
def report(self):
|
||||
# post-evolution analysis
|
||||
fitnesses = self.toolbox.map(self.toolbox.evaluate, self.population)
|
||||
if self.normalize_fitness:
|
||||
fitnesses = self.normalize_fitnesses(fitnesses)
|
||||
self.fitnesses = fitnesses
|
||||
self.sortedFitnesses = sorted(fitnesses)
|
||||
self.sortedFitnesses.reverse()
|
||||
self.fitness_dist(fitnesses)
|
||||
|
||||
self.bestFitness, self.worstFitness = self.sortedFitnesses[0], self.sortedFitnesses[-1]
|
||||
print("best/worst w", self.bestFitness, self.worstFitness)
|
||||
|
||||
self.bestGenome = tools.selBest(self.population, k=1)
|
||||
# print(self.bestGenome)
|
||||
|
||||
def ffmt(self, value, fmt="%3.2f"):
|
||||
return fmt % value
|
||||
|
||||
def fitness_dist(self, fitnesses):
|
||||
listfit = [x[0] for x in fitnesses]
|
||||
pct05, pct25, pct50, pct75, pct95 = np.percentile(listfit, [0.05, 0.25, 0.5, 0.75, 0.95])
|
||||
print(f"fitness dist: {self.ffmt(np.min(listfit))} {self.ffmt(pct05)} {self.ffmt(pct25)} {self.ffmt(pct50)} {self.ffmt(pct75)} {self.ffmt(pct95)} {self.ffmt(np.max(listfit))}")
|
||||
|
||||
def driver(self):
|
||||
# Initialize
|
||||
self.setup()
|
||||
# Generation loop
|
||||
self.loop()
|
||||
# Report
|
||||
self.report()
|
||||
self.log_it(self.maxgenerations)
|
||||
print(self.masv(self.population))
|
||||
pass
|
||||
|
||||
|
||||
def puzzles_exp_1():
|
||||
'''full test of the conveyorbelt world
|
||||
|
||||
>>> import copy
|
||||
>>> maxrewards = [1]
|
||||
>>> easy_features = [[0,1],[0,1],[3,1],[0,0]]
|
||||
>>> easy_rewards = [-1,-1,-1,1]
|
||||
>>> easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
|
||||
>>> p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
|
||||
>>> p2 = copy.deepcopy(p1)
|
||||
>>> puzzles = (p1,p2)
|
||||
>>> world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, randomize = False)
|
||||
>>> world.append(p1)
|
||||
>>> world.append(p2)
|
||||
:
|
||||
'''
|
||||
maxrewards = [1]
|
||||
easy_features = [[0,1],[0,1],[3,1],[0,0]]
|
||||
easy_rewards = [-1,-1,-1,1]
|
||||
easy_tt = np.array([[0,0,2,3], [0,0,0,0], [2,0,2,3], [3,3,3,3]])
|
||||
p1 = Puzzle(tt=easy_tt, features=easy_features, rewards=easy_rewards)
|
||||
p2 = copy.deepcopy(p1)
|
||||
puzzles = (p1, p2)
|
||||
world = ConvBelt(actionSpace = getActionSpace(puzzles),
|
||||
observationSpace = getObservationSpace(puzzles),
|
||||
maxRewards = maxrewards,
|
||||
agentclass=Agent,
|
||||
randomize = False, alpha=0.005)
|
||||
world.append(p1)
|
||||
world.append(p2)
|
||||
|
||||
environ = Holder()
|
||||
environ.world = world
|
||||
|
||||
|
||||
ew = EvolveWeights(world, popsize=100, maxgenerations=1000, tournsize=75, tournk=3, normalize_fitness=False)
|
||||
ew.driver()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("pe.py start...")
|
||||
|
||||
|
||||
puzzles_exp_1()
|
||||
|
||||
print("pe.py done.")
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
"""RESS.py
|
||||
|
||||
Random Equal Stimulus Sets
|
||||
|
||||
Originally coded in Object Pascal for Delphi by Wesley R. Elsberry
|
||||
around 1999.
|
||||
|
||||
Translation to Python 3 by ChatGPT (GPT-4) 2023-06-01.
|
||||
|
||||
Random Equal Stimulus Sets are sequences of numbers indicating one of
|
||||
a set of stimuli to be presented to a subject in a cognitive or
|
||||
psychophysics task. The basic rules for generating these sequences is
|
||||
derived from Gellermann 1925(?), but modified to permit the
|
||||
specification of more than two stimuli in the set. The restriction on
|
||||
a maximum of three sequential presentations of the same stimulus is
|
||||
retained.
|
||||
|
||||
Issues:
|
||||
The 'next_yield' method does not work.
|
||||
Using 'next' for a sequence longer than the defined length of
|
||||
sequence can cause there to be sequences that violate Gellermann's
|
||||
assumptions, as the sequences composed together are not tested
|
||||
across the joins.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import random
|
||||
|
||||
MAXRESS = 120 # Arbitrary maximum
|
||||
|
||||
class RESS:
|
||||
"""
|
||||
RESS class represents the equivalent of the Pascal unit 'ress' in Python.
|
||||
|
||||
Random Equal Stimulus Sets are sequences of numbers indicating one of
|
||||
a set of stimuli to be presented to a subject in a cognitive or
|
||||
psychophysics task. The basic rules for generating these sequences is
|
||||
derived from Gellermann 1925(?), but modified to permit the
|
||||
specification of more than two stimuli in the set. The restriction on
|
||||
a maximum of three sequential presentations of the same stimulus is
|
||||
retained.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.classes = None
|
||||
self.thelength = None
|
||||
self.series = [0] * MAXRESS
|
||||
self.lastseries = [0] * MAXRESS
|
||||
self.cnt = None
|
||||
self.seriesstr = ""
|
||||
self.current = None
|
||||
self.dummy = None
|
||||
self.hist = [0] * 61
|
||||
|
||||
def init(self):
|
||||
"""
|
||||
Initializes the variables in TRESS.
|
||||
"""
|
||||
self.classes = 1
|
||||
self.thelength = 0
|
||||
self.series = [0] * MAXRESS
|
||||
self.lastseries = [0] * MAXRESS
|
||||
self.hist = [0] * 61
|
||||
self.cnt = 0
|
||||
self.seriesstr = ""
|
||||
self.dummy = 0
|
||||
|
||||
def makestring(self):
|
||||
"""
|
||||
Creates a string representation of the series.
|
||||
Returns:
|
||||
The string representation of the series.
|
||||
"""
|
||||
tstr = ""
|
||||
for val in self.series[1:self.thelength + 1]:
|
||||
tstr += str(val)
|
||||
self.seriesstr = tstr
|
||||
return tstr
|
||||
|
||||
def generate(self, len, nclass):
|
||||
"""
|
||||
Generates a candidate series.
|
||||
Args:
|
||||
len: The length of the series.
|
||||
nclass: The number of classes.
|
||||
"""
|
||||
self.cnt = 0
|
||||
self.classes = nclass
|
||||
|
||||
# Constraint: sequence length less than maximum
|
||||
if MAXRESS >= len:
|
||||
self.thelength = len
|
||||
else:
|
||||
self.thelength = MAXRESS
|
||||
|
||||
# Constraint: Multiple of number of classes
|
||||
if self.thelength % self.classes != 0:
|
||||
self.thelength -= self.thelength % self.classes
|
||||
|
||||
for i in range(self.classes):
|
||||
self.hist[i] = self.thelength // self.classes
|
||||
|
||||
self.series[0] = random.randint(0, self.classes - 1)
|
||||
self.hist[self.series[0]] -= 1
|
||||
|
||||
run = 1
|
||||
for i in range(1, self.thelength):
|
||||
ctr = 0
|
||||
while True:
|
||||
ctr += 1
|
||||
jj = random.randint(0, self.classes - 1)
|
||||
if self.hist[jj] > 0:
|
||||
shortrun = (self.series[i - 1] == jj and run < 3) or (self.series[i - 1] != jj)
|
||||
break
|
||||
if ctr > 100:
|
||||
break
|
||||
if self.series[i - 1] == jj:
|
||||
run += 1
|
||||
else:
|
||||
run = 1
|
||||
self.hist[jj] -= 1
|
||||
self.series[i] = jj
|
||||
|
||||
def test(self):
|
||||
"""
|
||||
Tests candidates for criteria.
|
||||
Returns:
|
||||
True if the series is valid, False otherwise.
|
||||
"""
|
||||
ok = True
|
||||
hist = [0] * 61
|
||||
|
||||
for val in self.series[:self.thelength]:
|
||||
hist[val] += 1
|
||||
|
||||
for i in range(self.classes - 1):
|
||||
if hist[i] != hist[i + 1]:
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
run = 1
|
||||
for i in range(1, self.thelength):
|
||||
if self.series[i - 1] == self.series[i]:
|
||||
run += 1
|
||||
if run > 3:
|
||||
ok = False
|
||||
else:
|
||||
run = 1
|
||||
|
||||
return ok
|
||||
|
||||
def newress(self, nlen=24, nclass=2):
|
||||
"""
|
||||
Finds and saves a valid series using generate and test.
|
||||
Args:
|
||||
nlen: The length of the series.
|
||||
nclass: The number of classes.
|
||||
"""
|
||||
print('nlen', nlen, 'nclass', nclass)
|
||||
try:
|
||||
|
||||
random.seed()
|
||||
|
||||
self.lastseries = self.series
|
||||
|
||||
while True:
|
||||
self.generate(nlen, nclass)
|
||||
# print("gen", self.makestring())
|
||||
if self.test():
|
||||
break
|
||||
return self.makestring()
|
||||
except:
|
||||
estr = f"Error: {traceback.format_exc()}"
|
||||
print(estr)
|
||||
return ''
|
||||
|
||||
def next(self):
|
||||
"""
|
||||
Returns the next value within a series.
|
||||
Returns:
|
||||
The next value in the series.
|
||||
"""
|
||||
if self.cnt >= self.thelength:
|
||||
self.newress(self.thelength, self.classes)
|
||||
|
||||
self.cnt += 1
|
||||
self.current = self.series[self.cnt]
|
||||
return self.series[self.cnt]
|
||||
|
||||
def next_yield(self):
|
||||
"""
|
||||
Yields the next value within a series.
|
||||
"""
|
||||
print('start', self.series, self.cnt, self.series[self.cnt])
|
||||
|
||||
while True:
|
||||
if self.cnt >= self.thelength:
|
||||
print("calling newress")
|
||||
self.newress(self.thelength, self.classes)
|
||||
self.cnt = 0
|
||||
|
||||
print(self.cnt)
|
||||
print(self.series, self.cnt, self.series[self.cnt])
|
||||
self.current = self.series[self.cnt]
|
||||
yield str(self.current)
|
||||
self.cnt += 1
|
||||
|
||||
# Exercise the TRESS code
|
||||
|
||||
from random import seed
|
||||
|
||||
def main():
|
||||
# Set the seed for random number generation
|
||||
seed()
|
||||
|
||||
# Create an instance of the TRESS class
|
||||
ress1 = RESS()
|
||||
|
||||
# Initialize the TRESS object
|
||||
ress1.init()
|
||||
|
||||
# Generate and print a valid series
|
||||
ress1.newress(24, 3)
|
||||
series = ress1.makestring()
|
||||
print("Generated Series:", series)
|
||||
|
||||
ress1.newress(24, 3)
|
||||
series = ress1.makestring()
|
||||
print("Generated Series:", series)
|
||||
|
||||
ress1.newress(24, 3)
|
||||
series = ress1.makestring()
|
||||
print("Generated Series:", series)
|
||||
|
||||
ress1.newress(24, 3)
|
||||
series = ress1.makestring()
|
||||
print("Generated Series:", series)
|
||||
|
||||
ress1.newress(24, 3)
|
||||
series = ress1.makestring()
|
||||
print("Generated Series:", series)
|
||||
|
||||
# Generate and print the next value in the series
|
||||
for ii in range(26):
|
||||
next_val = ress1.next()
|
||||
print(ii, "Next Value:", str(next_val))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1 @@
|
|||
micromamba deactivate
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
UMAMBA_PATH="umamba_env"
|
||||
if [ ! -d "$UMAMBA_PATH" ]; then
|
||||
echo "no $UMAMBA_PATH found"
|
||||
. ./update_env.sh
|
||||
fi
|
||||
. ./activate_env.sh
|
||||
micromamba activate curio
|
||||
jupyter-lab
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf316089-5339-4ac8-b0e2-3618fe06a593",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np, itertools, copy\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from collections import defaultdict\n",
|
||||
"import importlib # module reloading\n",
|
||||
"\n",
|
||||
"# allow importing from the 'code/' dir\n",
|
||||
"import sys\n",
|
||||
"sys.path.append(\"../code\")\n",
|
||||
"\n",
|
||||
"import environments\n",
|
||||
"import agents\n",
|
||||
"# always forces a reload in case you have edited environments or agents\n",
|
||||
"importlib.reload(environments)\n",
|
||||
"importlib.reload(agents)\n",
|
||||
"from environments.gridworld import GridWorld\n",
|
||||
"from agents.q_agent import Agent\n",
|
||||
"\n",
|
||||
"# problem domain dependent settings\n",
|
||||
"dims = [4,12]\n",
|
||||
"obsSpace, actSpace = (dims[0], dims[1]), (4,)\n",
|
||||
"num_trials=1000\n",
|
||||
"n_actions = 4\n",
|
||||
"#(optimal lmbda in the agent is domain dependent - could be evolved)\n",
|
||||
"HARD_TIME_LIMIT = 50\n",
|
||||
"KILLED_REWARD = -10\n",
|
||||
"#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n",
|
||||
"#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n",
|
||||
"\n",
|
||||
"# create our own GridWorld that adheres to openAI-gym environment API during training\n",
|
||||
"env = GridWorld(dims = dims, startState = [3,0])\n",
|
||||
"\n",
|
||||
"# 4rows x 12columns (0,0) is top-left\n",
|
||||
"# -: empty location\n",
|
||||
"# S: Start location\n",
|
||||
"# G: Goal location\n",
|
||||
"# x: immediate fail (a hole / cliff)\n",
|
||||
"#\n",
|
||||
"# (map of grid world)\n",
|
||||
"# ------------\n",
|
||||
"# ------------\n",
|
||||
"# ------------\n",
|
||||
"# SxxxxxxxxxxG\n",
|
||||
"\n",
|
||||
"# add goals and holes\n",
|
||||
"# supports multiple goals, use 1 for now\n",
|
||||
"env.goals.append([3,11])\n",
|
||||
"# support multiple 'kill zones' (cliff edge, in openAI parlance)\n",
|
||||
"for i in range(1,11):\n",
|
||||
" env.holes.append([3,i])\n",
|
||||
" \n",
|
||||
"agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n",
|
||||
"# alpha # how much to weigh reward surprises that deviate from expectation\n",
|
||||
"# gamma # how important exepcted rewards will be\n",
|
||||
"# epsilon # fraction of exploration to exploitation (how often to choose a random action)\n",
|
||||
"# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"time_to_solve_each_trial = [] # lower is better\n",
|
||||
"for trialN in range(num_trials):\n",
|
||||
" # some output to see it running\n",
|
||||
" if (trialN % 10) == 0: print('.',end='')\n",
|
||||
" # initialize the agent, environment, and time for this trial\n",
|
||||
" agent.reset() # soft-reset() (keeps learned weights)\n",
|
||||
" nextState = env.reset()\n",
|
||||
" time = 0\n",
|
||||
" while True:\n",
|
||||
" time += 1\n",
|
||||
" # set agent senses based on environment and allow agent to determine an action\n",
|
||||
" agent.sensoryState = nextState\n",
|
||||
" agent.plasticUpdate()\n",
|
||||
" # determine effect on environment state & any reward (in standard openAI-gym API format)\n",
|
||||
" nextState, reward, goal_achieved, _ = env.step(agent.action)\n",
|
||||
" agent.reward = reward\n",
|
||||
" if goal_achieved or time == HARD_TIME_LIMIT: break\n",
|
||||
" # stop trial if agent explitly failed early\n",
|
||||
" elif reward <= KILLED_REWARD:\n",
|
||||
" agent.sensoryState = nextState\n",
|
||||
" agent.reward = reward\n",
|
||||
" agent.plasticUpdate() # allow 1 more update to 'learn' the bad reward\n",
|
||||
" agent.reset()\n",
|
||||
" nextState = env.reset()\n",
|
||||
" # record trial results\n",
|
||||
" time_to_solve_each_trial.append(time)\n",
|
||||
" \n",
|
||||
"print()\n",
|
||||
"plt.plot(time_to_solve_each_trial);\n",
|
||||
"pt=15 # font point\n",
|
||||
"plt.title('Time until agent solved trial', fontsize=pt)\n",
|
||||
"plt.xlabel('Trial', fontsize=pt)\n",
|
||||
"plt.ylabel('Time', fontsize=pt)\n",
|
||||
"\n",
|
||||
"# show path agent took in GridWorld using non-learning agent (staticUpdate())\n",
|
||||
"print(\"green dot: start location\")\n",
|
||||
"print(\"red dot: finish location\")\n",
|
||||
"env.render(agent)\n",
|
||||
"#render(agent,env)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d54a622f-42e4-4384-bf9a-0f0181301c3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b067867a-c1bc-4769-a6ac-15e7277ab8e2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np, itertools, copy\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from collections import defaultdict\n",
|
||||
"import importlib # module reloading\n",
|
||||
"\n",
|
||||
"# allow importing from the 'code/' dir\n",
|
||||
"import sys\n",
|
||||
"sys.path.append(\"../code\")\n",
|
||||
"\n",
|
||||
"import environments\n",
|
||||
"import agents\n",
|
||||
"# always forces a reload in case you have edited environments or agents\n",
|
||||
"importlib.reload(environments)\n",
|
||||
"importlib.reload(agents)\n",
|
||||
"from environments.puzzle import Puzzle, ConvBelt, Action, getActionSpace, getObservationSpace\n",
|
||||
"from agents.q_agent import Agent\n",
|
||||
"\n",
|
||||
"import copy # allows duplicating puzzles into unique puzzles, otherwise python refs are shallow-copied\n",
|
||||
"maxrewards = [1] # could have multiple levels of 'goodness'\n",
|
||||
"\n",
|
||||
"# Create a puzzle with 4 states:\n",
|
||||
"# state 0: first presentation\n",
|
||||
"# state 1: getting passed over, advancing on belt (not really a state, more a placeholder)\n",
|
||||
"# state 2: investigated (more sensory information is available when examined closely)\n",
|
||||
"# state 3: consumed (saturating state with possible reward)\n",
|
||||
"easy_puzzle_tt = np.array([[0,0,2,3], # state 0: first presentation\n",
|
||||
" [0,0,0,0], # state 1: getting passed over (placeholder)\n",
|
||||
" [2,0,2,3], # state 2: investigated\n",
|
||||
" [3,3,3,3]]) # state 3: consumed\n",
|
||||
"# example puzzle with 2 sensorial dimensions\n",
|
||||
"easy_puzzle_features = [[0,1], # state 0: Empty/Unknown & Spikes\n",
|
||||
" [0,1], # state 1: Empty/Unknown & Spikes\n",
|
||||
" [3,1], # state 2: Red & Spikes\n",
|
||||
" [0,0]] # state 3: Empty/Unknown & Empty/Unknown\n",
|
||||
"easy_puzzle_rewards = [-1, # state 0: first look\n",
|
||||
" -1, # state 1: proceeding to next puzzle (placeholder)\n",
|
||||
" -1, # state 2: investigate\n",
|
||||
" 1] # state 3: consume (could be -10 poisonous! or -1 empty/useless)\n",
|
||||
"p1 = Puzzle(tt = easy_puzzle_tt,\n",
|
||||
" features = easy_puzzle_features,\n",
|
||||
" rewards = easy_puzzle_rewards)\n",
|
||||
"p2 = copy.deepcopy(p1)\n",
|
||||
"puzzles = (p1,p2)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"obsSpace = getObservationSpace(puzzles)\n",
|
||||
"actSpace = getActionSpace(puzzles)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"env = ConvBelt(actionSpace = getActionSpace(puzzles), # indicate number of actions agent can take\n",
|
||||
" observationSpace = getObservationSpace(puzzles), # indicate number of sensorial dimensions and sizes\n",
|
||||
" maxRewards = maxrewards, # rewards that constitute postive rewards\n",
|
||||
" randomize = False, # randomize puzzle positions on belt at each reset()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# can use append() or extend()\n",
|
||||
"env.append(p1)\n",
|
||||
"env.append(p2)\n",
|
||||
"\n",
|
||||
"# domain-specific settings\n",
|
||||
"num_trials=200\n",
|
||||
"n_actions = 4\n",
|
||||
"#(optimal lmbda in the agent is domain dependent - could be evolved)\n",
|
||||
"HARD_TIME_LIMIT = 600\n",
|
||||
"#KILLED_REWARD = -10 # not used here\n",
|
||||
"#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n",
|
||||
"#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n",
|
||||
"\n",
|
||||
"agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n",
|
||||
"# alpha # how much to weigh reward surprises that deviate from expectation\n",
|
||||
"# gamma # how important exepcted rewards will be\n",
|
||||
"# epsilon # fraction of exploration to exploitation (how often to choose a random action)\n",
|
||||
"# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=\n",
|
||||
"\n",
|
||||
"time_to_solve_each_trial = []\n",
|
||||
"rewards = []\n",
|
||||
"\n",
|
||||
"for trialN in range(num_trials):\n",
|
||||
" # some output to see it running\n",
|
||||
" if (trialN % 10) == 0: print('.',end='')\n",
|
||||
" # initialize the agent, environment, and time for this trial\n",
|
||||
" agent.reset() # soft-reset() (keeps learned weights)\n",
|
||||
" nextState = env.reset()\n",
|
||||
" time = 0\n",
|
||||
" while True:\n",
|
||||
" time += 1\n",
|
||||
" # set agent senses based on environment and allow agent to determine an action\n",
|
||||
" agent.sensoryState = nextState\n",
|
||||
" agent.plasticUpdate()\n",
|
||||
" # determine effect on environment state & any reward (in standard openAI-gym API format)\n",
|
||||
" nextState, reward, goal_achieved, _ = env.step(agent.action)\n",
|
||||
" agent.reward = reward\n",
|
||||
" if env.puzzlesLeftToComplete == 0 or time == HARD_TIME_LIMIT:\n",
|
||||
" agent.plasticUpdate()\n",
|
||||
" break\n",
|
||||
" # could have deadly rewards that stop the trial early\n",
|
||||
" #elif reward <= -10:\n",
|
||||
" # agent.sensoryState = nextState\n",
|
||||
" # agent.reward = reward\n",
|
||||
" # agent.plasticUpdate()\n",
|
||||
" # agent.reset()\n",
|
||||
" # nextState = env.reset()\n",
|
||||
" rewards.append(reward)\n",
|
||||
" time_to_solve_each_trial.append(time)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"print()\n",
|
||||
"print(list(agent.weights.round(3)))\n",
|
||||
"#print(agent.timeSinceBigSurprise)\n",
|
||||
"plt.figure(figsize=(16,4),dpi=200)\n",
|
||||
"plt.plot(time_to_solve_each_trial)\n",
|
||||
"pt=15 # font point\n",
|
||||
"plt.title('Time until agent solved trial (puzzle boxes)', fontsize=pt)\n",
|
||||
"plt.xlabel('Trial', fontsize=pt)\n",
|
||||
"plt.ylabel('Time', fontsize=pt)\n",
|
||||
"#figure()\n",
|
||||
"#plot(rewards)\n",
|
||||
"env.render(agent);"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e22a5e6-47fb-45c0-905f-3fb5b6cc3980",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
% uses machine learning to facilitate automatic olfactory classification.
|
||||
% Intro discusses how smells are smelled.
|
||||
% PDF: https://arxiv.org/pdf/1906.07067
|
||||
@article{imam2020rapid,
|
||||
title={Rapid online learning and robust recall in a neuromorphic olfactory circuit},
|
||||
author={Imam, Nabil and Cleland, Thomas A},
|
||||
journal={Nature Machine Intelligence},
|
||||
volume={2},
|
||||
number={3},
|
||||
pages={181--191},
|
||||
year={2020},
|
||||
publisher={Nature Publishing Group}
|
||||
}
|
||||
|
||||
% PDF: https://search.proquest.com/docview/1297102848?pq-origsite=gscholar&imgSeq=1
|
||||
@article{gellermann1933chance,
|
||||
title={Chance orders of alternating stimuli in visual discrimination experiments},
|
||||
author={Gellermann, Louis W},
|
||||
journal={The journal of genetic psychology},
|
||||
volume={42},
|
||||
pages={206--208},
|
||||
year={1933},
|
||||
publisher={Journal Press, etc.}
|
||||
}
|
||||
|
||||
% PDF: https://static1.squarespace.com/static/5b82081250a54f02ee0758c8/t/5b8ed5a04fa51a484aa907ee/1536087459872/tinbergen+original.pdf
|
||||
% Also uploaded to repository.
|
||||
@article{Tinbergen1963Jan,
|
||||
author = {Tinbergen, N.},
|
||||
title = {{On aims and methods of Ethology}},
|
||||
journal = {Z. Tierpsychol.},
|
||||
volume = {20},
|
||||
number = {4},
|
||||
pages = {410--433},
|
||||
year = {1963},
|
||||
month = {Jan},
|
||||
issn = {0044-3573},
|
||||
publisher = {John Wiley {\&} Sons, Ltd},
|
||||
doi = {10.1111/j.1439-0310.1963.tb01161.x}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,7 @@
|
|||
python=3.11
|
||||
jupyter
|
||||
numpy
|
||||
matplotlib
|
||||
plotnine
|
||||
nodejs
|
||||
deap
|
||||
|
|
@ -0,0 +1 @@
|
|||
jupyterlab
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
OS="linux"
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
OS="osx"
|
||||
fi
|
||||
|
||||
ARCH="64"
|
||||
if [[ "$(uname -m)" == "aarch64" ]]; then
|
||||
if [[ "$OS" == "osx" ]]; then
|
||||
ARCH="arm64"
|
||||
else
|
||||
ARCH="aarch64"
|
||||
fi
|
||||
fi
|
||||
|
||||
SYSTEM="$OS-$ARCH"
|
||||
|
||||
# conda deactivate in case they have a conda env
|
||||
# micromamba deactivate in case they have a micromamba env
|
||||
conda deactivate &>/dev/null
|
||||
micromamba deactivate &>/dev/null
|
||||
|
||||
UMAMBA_PATH="umamba_env"
|
||||
if [ ! -d "umamba_env" ]; then
|
||||
# download micromamba
|
||||
echo "downloading micromamba to $UMAMBA_PATH/ ..."
|
||||
curl -Ls https://micro.mamba.pm/api/micromamba/${SYSTEM}/latest | tar -xvj bin/micromamba
|
||||
mv bin $UMAMBA_PATH
|
||||
# activate micromamba
|
||||
export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
|
||||
eval "$(./umamba_env/micromamba shell hook -s posix)"
|
||||
# create the project environment
|
||||
echo "creating 'curio' environment"
|
||||
micromamba create -n curio -c conda-forge
|
||||
micromamba activate curio
|
||||
else
|
||||
echo "found micromamba at $UMAMBA_PATH"
|
||||
micromamba activate curio
|
||||
export MAMBA_ROOT_PREFIX=$PWD/$UMAMBA_PATH
|
||||
eval "$(./$UMAMBA_PATH/micromamba shell hook -s posix)"
|
||||
fi
|
||||
echo "installing packages"
|
||||
# install conda requirements
|
||||
micromamba install --yes $(tr '\n' ' ' < requirements-conda.txt) -c conda-forge
|
||||
# install pip requirements
|
||||
pip install --no-input -r requirements-pip.txt
|
||||
micromamba deactivate
|
||||
Loading…
Reference in New Issue