334 lines
11 KiB
Python
Executable File
334 lines
11 KiB
Python
Executable File
"""
|
|
exp1.py - instance of use of 'experiment.py'
|
|
|
|
Tasks:
|
|
|
|
- Consider how to have a changing schedule of stimulus presentation
|
|
|
|
Need to have something where we can see evolution producing a trait that
|
|
would indicate interest in new things in the environment. Sets up conditions
|
|
where curiosity could be advantageous.
|
|
|
|
Conveyor belt needs to have the ability to introduce new things.
|
|
|
|
Single factor shift to start -- color of the thing ?
|
|
The introduction of novelty is the main thing, where the novelty is
|
|
associated with fitness advantage.
|
|
|
|
Simple systems to test
|
|
- constant environment
|
|
- switch between two different environments
|
|
- frequency of shift makes a difference
|
|
- Goldilocks zone for intermediate frequency
|
|
Controlled randomization
|
|
- Known low-payoff 'food' in environment
|
|
- Better thing has a cue
|
|
- Changing frequency of presentation
|
|
- Constant
|
|
- Ramp
|
|
- Cycle
|
|
- 'Green' could indicate better but
|
|
- x factor for better could be changed
|
|
|
|
For all of these, we can test unseen (novel) stimuli
|
|
- Generalization can be tested
|
|
- Cue of goodness
|
|
- Proportion of time novel stimulus are rewarding
|
|
- Must be a proportion to introduce unpredictability
|
|
|
|
One hypothesis: unpredictability between cues and rewards may lead to curiosity
|
|
- Evolutionary timescale of unpredictability
|
|
- Predictable lifetime
|
|
|
|
Push current code to repository.
|
|
|
|
|
|
"""
|
|
|
|
|
|
import sys
|
|
# allow importing from the 'code/' dir
|
|
sys.path.append("../code")
|
|
|
|
import os
|
|
import platform
|
|
import pickle
|
|
import json
|
|
import traceback
|
|
import datetime
|
|
import copy
|
|
|
|
import numpy as np # , itertools, copy
|
|
import matplotlib.pyplot as plt
|
|
from collections import defaultdict
|
|
import importlib # module reloading
|
|
|
|
import environments
|
|
import agents
|
|
|
|
# always forces a reload in case you have edited environments or agents
|
|
importlib.reload(environments)
|
|
importlib.reload(agents)
|
|
#from environments.gridworld import GridWorld
|
|
import environments.puzzle as pz
|
|
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
|
|
|
from agents.q_agent import EvolvableAgent as Agent
|
|
|
|
# DEAP imports
|
|
|
|
import random
|
|
from deap import creator, base, tools, algorithms
|
|
|
|
import multiprocessing
|
|
|
|
#pool = multiprocessing.Pool()
|
|
#toolbox.register("map", pool.map)
|
|
|
|
# Weight handling
|
|
from mda import MultiDimArray
|
|
|
|
# RESS
|
|
from ress import RESS
|
|
|
|
# EvolveWeights
|
|
# from ew import EvolveWeights
|
|
from curio_evolve_weights import EvolveWeights
|
|
|
|
# Experiment
|
|
from experiment import Experiment
|
|
|
|
def isotime():
|
|
return datetime.datetime.now().isoformat()
|
|
|
|
def t2fn(timestamp):
|
|
timestamp = timestamp.replace('.','_')
|
|
timestamp = timestamp.replace(':','_')
|
|
return timestamp
|
|
|
|
class Holder(object):
|
|
"""
|
|
A general class for the equivalent of a digital duffle bag, each instance
|
|
can have essentially whatever you want stuffed into it.
|
|
|
|
This is essentially the very opposite of defining classes with the
|
|
__slots__ convention, leaving the contents entirely open.
|
|
|
|
I've found this useful for making context objects. If I am careful,
|
|
the whole object can be serialized to disk and loaded later.
|
|
"""
|
|
def __init__(self):
|
|
pass
|
|
|
|
"""
|
|
Probability of reward at all
|
|
Probability of strength of reward
|
|
|
|
Variances:
|
|
- How many puzzle cues do we have?
|
|
- How often does a puzzle appear in training?
|
|
- How often does a puzzle appear across evolutionary time?
|
|
- How much reward does solving a puzzle deliver?
|
|
|
|
Two things , green | red
|
|
green good
|
|
red bad
|
|
|
|
Outcomes
|
|
- Too unlikely -> no behavior to examine
|
|
- Entirely predictable
|
|
- In between -> curiosity has advantage
|
|
|
|
First sample from uniform distribution to determine reward (0.5)
|
|
Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.)
|
|
|
|
Spot or range where it becomes advantageous to evolve a curiosity module...
|
|
|
|
|
|
Figuring out a representation that allows all the flexibility we discussed...
|
|
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[2], # state 0: Green
|
|
[2], # state 1: Green (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [
|
|
[-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1, # state 2: consume (reward)
|
|
0.5 # Proportion
|
|
],
|
|
[-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-1, # state 2: consume (punishment)
|
|
0.5 # Proportion
|
|
],
|
|
]
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
|
|
|
|
"""
|
|
|
|
def exp1_environment(*args, **kwargs):
|
|
|
|
unambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[2], # state 0: Green
|
|
[2], # state 1: Green (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
|
|
ambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
# Notion: Have an object to define a schedule of presentation of
|
|
# environments, with the ability to stochastically present one of
|
|
# a list of environments.
|
|
exp_schedule = {
|
|
"setlist": [
|
|
{
|
|
"desc": "Initial puzzle set",
|
|
"specs": [unambiguous_puzzle_spec],
|
|
"turns": 50,
|
|
"num_stimuli": 6,
|
|
"sequence_type": "fixed",
|
|
"probs": [[1.0], [1.0]]
|
|
},
|
|
{
|
|
"desc": "Stochastic puzzle sets",
|
|
"specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec],
|
|
"turns": 200,
|
|
"num_stimuli": 6,
|
|
"sequence_type": "stochastic",
|
|
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
|
},
|
|
]
|
|
}
|
|
|
|
if 'num_puzzles_on_belt' in kwargs:
|
|
num_puzzles_on_belt = 6
|
|
|
|
pz = unambiguous_puzzle_spec
|
|
if (1):
|
|
maxrewards = [1]
|
|
# Produce Gellermann sequence
|
|
upress = RESS()
|
|
print(dir(upress))
|
|
print(pz['puzzles'])
|
|
print(len(pz['puzzles']))
|
|
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
|
|
print("upseries", upseries)
|
|
# Create puzzle sequence
|
|
# Instantiate puzzles per Gellermann sequence
|
|
puzzles = []
|
|
for stimi in upseries:
|
|
|
|
stimn = int(stimi)
|
|
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
|
|
features=pz['puzzles'][stimn]['features'],
|
|
rewards=pz['puzzles'][stimn]['rewards']
|
|
)
|
|
puzzles.append(myp)
|
|
# Create conveyor belt
|
|
world = ConvBelt(actionSpace = getActionSpace(puzzles),
|
|
observationSpace = getObservationSpace(puzzles),
|
|
maxRewards = maxrewards,
|
|
agentclass=Agent,
|
|
randomize = False, alpha=0.005)
|
|
# Add puzzles
|
|
for pi in puzzles:
|
|
world.append(pi)
|
|
|
|
return world
|
|
|
|
def do_experiment():
|
|
# Experiment instance
|
|
myexp = Experiment()
|
|
myexp.set_agentclass(Agent)
|
|
myexp.set_environclass(ConvBelt)
|
|
myexp.set_evolverclass(EvolveWeights)
|
|
myexp.set_evolver_attributes() # defaults
|
|
myexp.set_environ_maker(exp1_environment) # sets function
|
|
myexp.make_environ() # Calls function
|
|
myexp.make_evolver_instance()
|
|
if myexp.validate():
|
|
myexp.evolver.driver()
|
|
else:
|
|
print("Experiment failed to validate.")
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print("exp1.py start...")
|
|
|
|
|
|
do_experiment()
|
|
|
|
print("exp1.py done.")
|