356 lines
12 KiB
Python
Executable File
356 lines
12 KiB
Python
Executable File
import sys
|
|
# allow importing from the 'code/' dir
|
|
sys.path.append("../code")
|
|
|
|
import os
|
|
import platform
|
|
import pickle
|
|
import json
|
|
import traceback
|
|
import datetime
|
|
import copy
|
|
|
|
import numpy as np # , itertools, copy
|
|
import matplotlib.pyplot as plt
|
|
from collections import defaultdict
|
|
import importlib # module reloading
|
|
|
|
import environments
|
|
import agents
|
|
|
|
# always forces a reload in case you have edited environments or agents
|
|
importlib.reload(environments)
|
|
importlib.reload(agents)
|
|
#from environments.gridworld import GridWorld
|
|
import environments.puzzle as pz
|
|
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace
|
|
|
|
from agents.q_agent import EvolvableAgent as Agent
|
|
|
|
# DEAP imports
|
|
|
|
import random
|
|
from deap import creator, base, tools, algorithms
|
|
|
|
import multiprocessing
|
|
|
|
#pool = multiprocessing.Pool()
|
|
#toolbox.register("map", pool.map)
|
|
|
|
# Weight handling
|
|
from mda import MultiDimArray
|
|
|
|
# RESS
|
|
from ress import RESS
|
|
|
|
# EvolveWeights
|
|
# from ew import EvolveWeights
|
|
from curio_evolve_weights import EvolveWeights
|
|
|
|
# Experiment
|
|
from curio_experiment import Experiment
|
|
|
|
def isotime():
|
|
return datetime.datetime.now().isoformat()
|
|
|
|
def t2fn(timestamp):
|
|
timestamp = timestamp.replace('.','_')
|
|
timestamp = timestamp.replace(':','_')
|
|
return timestamp
|
|
|
|
class Holder(object):
|
|
def __init__(self):
|
|
pass
|
|
|
|
if (1):
|
|
unambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[2], # state 0: Green
|
|
[2], # state 1: Green (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
|
|
ambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
|
|
specdict = {
|
|
'unambiguous_puzzle_spec': unambiguous_puzzle_spec,
|
|
'ambiguous_puzzle_spec': ambiguous_puzzle_spec,
|
|
}
|
|
|
|
|
|
exp_schedule = {
|
|
"setlist": [
|
|
{
|
|
"desc": "Initial puzzle set",
|
|
"specs": ['unambiguous_puzzle_spec'],
|
|
"turns": 50, # How many turns for 'lifetime learning'
|
|
# Needs to be passed to the agent
|
|
"num_stimuli": 6, # How many puzzles? Or how many different features?
|
|
# Might just be number of 'features' in puzzle spec
|
|
# We do not need to manually specify puzzle feature number
|
|
"sequence_type": "fixed", #
|
|
"probs": [[1.0], [1.0]] #
|
|
},
|
|
{
|
|
"desc": "Stochastic puzzle sets",
|
|
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
|
|
"turns": 200,
|
|
"num_stimuli": 6,
|
|
"sequence_type": "stochastic",
|
|
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
|
},
|
|
]
|
|
}
|
|
|
|
|
|
def make_puzzle_list(*args, **kwargs):
|
|
"""
|
|
"""
|
|
# Sanity checks
|
|
req_params = ['specdict', 'schedule']
|
|
|
|
paramsvalid = True
|
|
|
|
for rpi in req_params:
|
|
if not rpi in kwargs:
|
|
paramsvalid = False
|
|
print("make_puzzle_list missing", rpi)
|
|
assert paramsvalid, f"Error: Missing a required parameter. Quitting."
|
|
|
|
specdict = kwargs['specdict']
|
|
schedule = kwargs['schedule']
|
|
|
|
puzzles = []
|
|
|
|
upress = RESS() # Random Equal Stimulus Sets instance
|
|
|
|
for seti in schedule['setlist']:
|
|
num_sets = len(seti['specs'])
|
|
num_stimuli = seti['num_stimuli']
|
|
num_turns = seti['turns']
|
|
seqtype = seti['sequence_type']
|
|
probs = seti['probs']
|
|
|
|
if 1 == num_sets:
|
|
# Simple, just repeat the puzzle num_stimuli * times
|
|
|
|
pass
|
|
else:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def exp1_environment(*args, **kwargs):
|
|
|
|
unambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[2], # state 0: Green
|
|
[2], # state 1: Green (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
|
|
ambiguous_puzzle_spec = {
|
|
"puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
|
|
"puzzles": [
|
|
{
|
|
"puzzle_description": "Appetitive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
1], # state 2: consume (reward)
|
|
},
|
|
{
|
|
"puzzle_description": "Aversive puzzle",
|
|
"tt": [[0,0,2], # state 0: first presentation
|
|
[0,0,0], # state 1: getting passed over (placeholder)
|
|
[2,2,2]], # state 2: consumed (saturating)],
|
|
"features": [[1], # state 0: Red
|
|
[1], # state 1: Red (placeholder)
|
|
[0]], # state 2: Empty/Unknown (after being eaten)
|
|
"rewards": [-1, # state 0: first look
|
|
-1, # state 1: proceeding to next puzzle (placeholder)
|
|
-2], # state 2: consume (punishment)
|
|
},
|
|
]
|
|
}
|
|
# Notion: Have an object to define a schedule of presentation of
|
|
# environments, with the ability to stochastically present one of
|
|
# a list of environments.
|
|
exp_schedule = {
|
|
"setlist": [
|
|
{
|
|
"desc": "Initial puzzle set",
|
|
"specs": ['unambiguous_puzzle_spec'],
|
|
"turns": 50,
|
|
"num_stimuli": 6,
|
|
"sequence_type": "fixed",
|
|
"probs": [[1.0], [1.0]]
|
|
},
|
|
{
|
|
"desc": "Stochastic puzzle sets",
|
|
"specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'],
|
|
"turns": 200,
|
|
"num_stimuli": 6,
|
|
"sequence_type": "stochastic",
|
|
"probs": [[1.0, 0.0], [0.0, 1.0]]
|
|
},
|
|
]
|
|
}
|
|
|
|
if 'num_puzzles_on_belt' in kwargs:
|
|
num_puzzles_on_belt = kwargs['num_puzzles_on_belt']
|
|
else:
|
|
num_puzzles_on_belt = 6
|
|
|
|
pz = unambiguous_puzzle_spec
|
|
if (1):
|
|
maxrewards = [1]
|
|
# Produce Gellermann sequence
|
|
upress = RESS()
|
|
print(dir(upress))
|
|
print(pz['puzzles'])
|
|
print(len(pz['puzzles']))
|
|
upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
|
|
print("upseries", upseries)
|
|
# Create puzzle sequence
|
|
# call to make_puzzle_list goes about here
|
|
|
|
# Instantiate puzzles per Gellermann sequence
|
|
puzzles = []
|
|
for stimi in upseries:
|
|
|
|
stimn = int(stimi)
|
|
myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
|
|
features=pz['puzzles'][stimn]['features'],
|
|
rewards=pz['puzzles'][stimn]['rewards']
|
|
)
|
|
puzzles.append(myp)
|
|
# Create conveyor belt
|
|
world = ConvBelt(actionSpace = getActionSpace(puzzles),
|
|
observationSpace = getObservationSpace(puzzles),
|
|
maxRewards = maxrewards,
|
|
agentclass=Agent,
|
|
randomize = False, alpha=0.005)
|
|
# Add puzzles
|
|
for pi in puzzles:
|
|
world.append(pi)
|
|
|
|
return world
|
|
|
|
|
|
|
|
def do_experiment():
|
|
# Experiment instance
|
|
print('creating myexp')
|
|
myexp = Experiment()
|
|
print('setting agentclass')
|
|
myexp.set_agentclass(Agent)
|
|
print('setting environclass')
|
|
myexp.set_environclass(ConvBelt)
|
|
print('setting evolverclass')
|
|
myexp.set_evolverclass(EvolveWeights)
|
|
print('setting evolver_attributes')
|
|
myexp.set_evolver_attributes() # defaults
|
|
print('setting environ_maker')
|
|
myexp.set_environ_maker(exp1_environment) # sets function
|
|
print('making environment')
|
|
myexp.make_environ() # Calls function
|
|
print('making evolver_instance')
|
|
myexp.make_evolver_instance()
|
|
if myexp.validate():
|
|
print('running driver')
|
|
myexp.evolver.driver()
|
|
else:
|
|
print("Experiment failed to validate.")
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print("exp1.py start...")
|
|
|
|
do_experiment()
|
|
|
|
print("exp1.py done.")
|