alice/code/exp1.py

334 lines
11 KiB
Python
Executable File

"""
exp1.py - instance of use of 'experiment.py'
Tasks:
- Consider how to have a changing schedule of stimulus presentation
Need to have something where we can see evolution producing a trait that
would indicate interest in new things in the environment. Sets up conditions
where curiosity could be advantageous.
Conveyor belt needs to have the ability to introduce new things.
Single factor shift to start -- color of the thing ?
The introduction of novelty is the main thing, where the novelty is
associated with fitness advantage.
Simple systems to test
- constant environment
- switch between two different environments
- frequency of shift makes a difference
- Goldilocks zone for intermediate frequency
Controlled randomization
- Known low-payoff 'food' in environment
- Better thing has a cue
- Changing frequency of presentation
- Constant
- Ramp
- Cycle
- 'Green' could indicate better but
- x factor for better could be changed
For all of these, we can test unseen (novel) stimuli
- Generalization can be tested
- Cue of goodness
- Proportion of time novel stimulus are rewarding
- Must be a proportion to introduce unpredictability
One hypothesis: unpredictability between cues and rewards may lead to curiosity
- Evolutionary timescale of unpredictability
- Predictable lifetime
Push current code to repository.
"""
import sys
# allow importing from the 'code/' dir
sys.path.append("../code")
import os
import platform
import pickle
import json
import traceback
import datetime
import copy
import numpy as np # , itertools, copy
import matplotlib.pyplot as plt
from collections import defaultdict
import importlib # module reloading
import environments
import agents
# always forces a reload in case you have edited environments or agents
importlib.reload(environments)
importlib.reload(agents)
#from environments.gridworld import GridWorld
import environments.puzzle as pz
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
from agents.q_agent import EvolvableAgent as Agent
# DEAP imports
import random
from deap import creator, base, tools, algorithms
import multiprocessing
#pool = multiprocessing.Pool()
#toolbox.register("map", pool.map)
# Weight handling
from mda import MultiDimArray
# RESS
from ress import RESS
# EvolveWeights
# from ew import EvolveWeights
from curio_evolve_weights import EvolveWeights
# Experiment
from experiment import Experiment
def isotime():
return datetime.datetime.now().isoformat()
def t2fn(timestamp):
timestamp = timestamp.replace('.','_')
timestamp = timestamp.replace(':','_')
return timestamp
class Holder(object):
"""
A general class for the equivalent of a digital duffle bag, each instance
can have essentially whatever you want stuffed into it.
This is essentially the very opposite of defining classes with the
__slots__ convention, leaving the contents entirely open.
I've found this useful for making context objects. If I am careful,
the whole object can be serialized to disk and loaded later.
"""
def __init__(self):
pass
"""
Probability of reward at all
Probability of strength of reward
Variances:
- How many puzzle cues do we have?
- How often does a puzzle appear in training?
- How often does a puzzle appear across evolutionary time?
- How much reward does solving a puzzle deliver?
Two things , green | red
green good
red bad
Outcomes
- Too unlikely -> no behavior to examine
- Entirely predictable
- In between -> curiosity has advantage
First sample from uniform distribution to determine reward (0.5)
Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.)
Spot or range where it becomes advantageous to evolve a curiosity module...
Figuring out a representation that allows all the flexibility we discussed...
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[2], # state 0: Green
[2], # state 1: Green (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [
[-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1, # state 2: consume (reward)
0.5 # Proportion
],
[-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-1, # state 2: consume (punishment)
0.5 # Proportion
],
]
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
"""
def exp1_environment(*args, **kwargs):
unambiguous_puzzle_spec = {
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[2], # state 0: Green
[2], # state 1: Green (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
ambiguous_puzzle_spec = {
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
# Notion: Have an object to define a schedule of presentation of
# environments, with the ability to stochastically present one of
# a list of environments.
exp_schedule = {
"setlist": [
{
"desc": "Initial puzzle set",
"specs": [unambiguous_puzzle_spec],
"turns": 50,
"num_stimuli": 6,
"sequence_type": "fixed",
"probs": [[1.0], [1.0]]
},
{
"desc": "Stochastic puzzle sets",
"specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec],
"turns": 200,
"num_stimuli": 6,
"sequence_type": "stochastic",
"probs": [[1.0, 0.0], [0.0, 1.0]]
},
]
}
if 'num_puzzles_on_belt' in kwargs:
num_puzzles_on_belt = 6
pz = unambiguous_puzzle_spec
if (1):
maxrewards = [1]
# Produce Gellermann sequence
upress = RESS()
print(dir(upress))
print(pz['puzzles'])
print(len(pz['puzzles']))
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
print("upseries", upseries)
# Create puzzle sequence
# Instantiate puzzles per Gellermann sequence
puzzles = []
for stimi in upseries:
stimn = int(stimi)
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
features=pz['puzzles'][stimn]['features'],
rewards=pz['puzzles'][stimn]['rewards']
)
puzzles.append(myp)
# Create conveyor belt
world = ConvBelt(actionSpace = getActionSpace(puzzles),
observationSpace = getObservationSpace(puzzles),
maxRewards = maxrewards,
agentclass=Agent,
randomize = False, alpha=0.005)
# Add puzzles
for pi in puzzles:
world.append(pi)
return world
def do_experiment():
# Experiment instance
myexp = Experiment()
myexp.set_agentclass(Agent)
myexp.set_environclass(ConvBelt)
myexp.set_evolverclass(EvolveWeights)
myexp.set_evolver_attributes() # defaults
myexp.set_environ_maker(exp1_environment) # sets function
myexp.make_environ() # Calls function
myexp.make_evolver_instance()
if myexp.validate():
myexp.evolver.driver()
else:
print("Experiment failed to validate.")
if __name__ == "__main__":
print("exp1.py start...")
do_experiment()
print("exp1.py done.")