alice/code/curio_exp1.py

356 lines
12 KiB
Python
Executable File

import sys
# allow importing from the 'code/' dir
sys.path.append("../code")
import os
import platform
import pickle
import json
import traceback
import datetime
import copy
import numpy as np # , itertools, copy
import matplotlib.pyplot as plt
from collections import defaultdict
import importlib # module reloading
import environments
import agents
# always forces a reload in case you have edited environments or agents
importlib.reload(environments)
importlib.reload(agents)
#from environments.gridworld import GridWorld
import environments.puzzle as pz
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
from agents.q_agent import EvolvableAgent as Agent
# DEAP imports
import random
from deap import creator, base, tools, algorithms
import multiprocessing
#pool = multiprocessing.Pool()
#toolbox.register("map", pool.map)
# Weight handling
from mda import MultiDimArray
# RESS
from ress import RESS
# EvolveWeights
# from ew import EvolveWeights
from curio_evolve_weights import EvolveWeights
# Experiment
from curio_experiment import Experiment
def isotime():
return datetime.datetime.now().isoformat()
def t2fn(timestamp):
timestamp = timestamp.replace('.','_')
timestamp = timestamp.replace(':','_')
return timestamp
class Holder(object):
def __init__(self):
pass
if (1):
unambiguous_puzzle_spec = {
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[2], # state 0: Green
[2], # state 1: Green (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
ambiguous_puzzle_spec = {
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
specdict = {
'unambiguous_puzzle_spec': unambiguous_puzzle_spec,
'ambiguous_puzzle_spec': ambiguous_puzzle_spec,
}
exp_schedule = {
"setlist": [
{
"desc": "Initial puzzle set",
"specs": ['unambiguous_puzzle_spec'],
"turns": 50, # How many turns for 'lifetime learning'
# Needs to be passed to the agent
"num_stimuli": 6, # How many puzzles? Or how many different features?
# Might just be number of 'features' in puzzle spec
# We do not need to manually specify puzzle feature number
"sequence_type": "fixed", #
"probs": [[1.0], [1.0]] #
},
{
"desc": "Stochastic puzzle sets",
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
"turns": 200,
"num_stimuli": 6,
"sequence_type": "stochastic",
"probs": [[1.0, 0.0], [0.0, 1.0]]
},
]
}
def make_puzzle_list(*args, **kwargs):
"""
"""
# Sanity checks
req_params = ['specdict', 'schedule']
paramsvalid = True
for rpi in req_params:
if not rpi in kwargs:
paramsvalid = False
print("make_puzzle_list missing", rpi)
assert paramsvalid, f"Error: Missing a required parameter. Quitting."
specdict = kwargs['specdict']
schedule = kwargs['schedule']
puzzles = []
upress = RESS() # Random Equal Stimulus Sets instance
for seti in schedule['setlist']:
num_sets = len(seti['specs'])
num_stimuli = seti['num_stimuli']
num_turns = seti['turns']
seqtype = seti['sequence_type']
probs = seti['probs']
if 1 == num_sets:
# Simple, just repeat the puzzle num_stimuli * times
pass
else:
pass
def exp1_environment(*args, **kwargs):
unambiguous_puzzle_spec = {
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[2], # state 0: Green
[2], # state 1: Green (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
ambiguous_puzzle_spec = {
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
"puzzles": [
{
"puzzle_description": "Appetitive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
1], # state 2: consume (reward)
},
{
"puzzle_description": "Aversive puzzle",
"tt": [[0,0,2], # state 0: first presentation
[0,0,0], # state 1: getting passed over (placeholder)
[2,2,2]], # state 2: consumed (saturating)],
"features": [[1], # state 0: Red
[1], # state 1: Red (placeholder)
[0]], # state 2: Empty/Unknown (after being eaten)
"rewards": [-1, # state 0: first look
-1, # state 1: proceeding to next puzzle (placeholder)
-2], # state 2: consume (punishment)
},
]
}
# Notion: Have an object to define a schedule of presentation of
# environments, with the ability to stochastically present one of
# a list of environments.
exp_schedule = {
"setlist": [
{
"desc": "Initial puzzle set",
"specs": ['unambiguous_puzzle_spec'],
"turns": 50,
"num_stimuli": 6,
"sequence_type": "fixed",
"probs": [[1.0], [1.0]]
},
{
"desc": "Stochastic puzzle sets",
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
"turns": 200,
"num_stimuli": 6,
"sequence_type": "stochastic",
"probs": [[1.0, 0.0], [0.0, 1.0]]
},
]
}
if 'num_puzzles_on_belt' in kwargs:
num_puzzles_on_belt = kwargs['num_puzzles_on_belt']
else:
num_puzzles_on_belt = 6
pz = unambiguous_puzzle_spec
if (1):
maxrewards = [1]
# Produce Gellermann sequence
upress = RESS()
print(dir(upress))
print(pz['puzzles'])
print(len(pz['puzzles']))
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
print("upseries", upseries)
# Create puzzle sequence
# call to make_puzzle_list goes about here
# Instantiate puzzles per Gellermann sequence
puzzles = []
for stimi in upseries:
stimn = int(stimi)
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
features=pz['puzzles'][stimn]['features'],
rewards=pz['puzzles'][stimn]['rewards']
)
puzzles.append(myp)
# Create conveyor belt
world = ConvBelt(actionSpace = getActionSpace(puzzles),
observationSpace = getObservationSpace(puzzles),
maxRewards = maxrewards,
agentclass=Agent,
randomize = False, alpha=0.005)
# Add puzzles
for pi in puzzles:
world.append(pi)
return world
def do_experiment():
# Experiment instance
print('creating myexp')
myexp = Experiment()
print('setting agentclass')
myexp.set_agentclass(Agent)
print('setting environclass')
myexp.set_environclass(ConvBelt)
print('setting evolverclass')
myexp.set_evolverclass(EvolveWeights)
print('setting evolver_attributes')
myexp.set_evolver_attributes() # defaults
print('setting environ_maker')
myexp.set_environ_maker(exp1_environment) # sets function
print('making environment')
myexp.make_environ() # Calls function
print('making evolver_instance')
myexp.make_evolver_instance()
if myexp.validate():
print('running driver')
myexp.evolver.driver()
else:
print("Experiment failed to validate.")
if __name__ == "__main__":
print("exp1.py start...")
do_experiment()
print("exp1.py done.")