import sys # allow importing from the 'code/' dir sys.path.append("../code") import os import platform import pickle import json import traceback import datetime import copy import numpy as np # , itertools, copy import matplotlib.pyplot as plt from collections import defaultdict import importlib # module reloading import environments import agents # always forces a reload in case you have edited environments or agents importlib.reload(environments) importlib.reload(agents) #from environments.gridworld import GridWorld import environments.puzzle as pz from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace from agents.q_agent import EvolvableAgent as Agent # DEAP imports import random from deap import creator, base, tools, algorithms import multiprocessing #pool = multiprocessing.Pool() #toolbox.register("map", pool.map) # Weight handling from mda import MultiDimArray # RESS from ress import RESS # EvolveWeights # from ew import EvolveWeights from curio_evolve_weights import EvolveWeights # Experiment from curio_experiment import Experiment def isotime(): return datetime.datetime.now().isoformat() def t2fn(timestamp): timestamp = timestamp.replace('.','_') timestamp = timestamp.replace(':','_') return timestamp class Holder(object): def __init__(self): pass if (1): unambiguous_puzzle_spec = { "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[2], # state 0: Green [2], # state 1: Green (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } ambiguous_puzzle_spec = { "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } specdict = { 'unambiguous_puzzle_spec': unambiguous_puzzle_spec, 'ambiguous_puzzle_spec': ambiguous_puzzle_spec, } exp_schedule = { "setlist": [ { "desc": "Initial puzzle set", "specs": ['unambiguous_puzzle_spec'], "turns": 50, # How many turns for 'lifetime learning' # Needs to be passed to the agent "num_stimuli": 6, # How many puzzles? Or how many different features? # Might just be number of 'features' in puzzle spec # We do not need to manually specify puzzle feature number "sequence_type": "fixed", # "probs": [[1.0], [1.0]] # }, { "desc": "Stochastic puzzle sets", "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'], "turns": 200, "num_stimuli": 6, "sequence_type": "stochastic", "probs": [[1.0, 0.0], [0.0, 1.0]] }, ] } def make_puzzle_list(*args, **kwargs): """ """ # Sanity checks req_params = ['specdict', 'schedule'] paramsvalid = True for rpi in req_params: if not rpi in kwargs: paramsvalid = False print("make_puzzle_list missing", rpi) assert paramsvalid, f"Error: Missing a required parameter. Quitting." specdict = kwargs['specdict'] schedule = kwargs['schedule'] puzzles = [] upress = RESS() # Random Equal Stimulus Sets instance for seti in schedule['setlist']: num_sets = len(seti['specs']) num_stimuli = seti['num_stimuli'] num_turns = seti['turns'] seqtype = seti['sequence_type'] probs = seti['probs'] if 1 == num_sets: # Simple, just repeat the puzzle num_stimuli * times pass else: pass def exp1_environment(*args, **kwargs): unambiguous_puzzle_spec = { "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[2], # state 0: Green [2], # state 1: Green (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } ambiguous_puzzle_spec = { "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } # Notion: Have an object to define a schedule of presentation of # environments, with the ability to stochastically present one of # a list of environments. exp_schedule = { "setlist": [ { "desc": "Initial puzzle set", "specs": ['unambiguous_puzzle_spec'], "turns": 50, "num_stimuli": 6, "sequence_type": "fixed", "probs": [[1.0], [1.0]] }, { "desc": "Stochastic puzzle sets", "specs": ['unambiguous_puzzle_spec', 'ambiguous_puzzle_spec'], "turns": 200, "num_stimuli": 6, "sequence_type": "stochastic", "probs": [[1.0, 0.0], [0.0, 1.0]] }, ] } if 'num_puzzles_on_belt' in kwargs: num_puzzles_on_belt = kwargs['num_puzzles_on_belt'] else: num_puzzles_on_belt = 6 pz = unambiguous_puzzle_spec if (1): maxrewards = [1] # Produce Gellermann sequence upress = RESS() print(dir(upress)) print(pz['puzzles']) print(len(pz['puzzles'])) upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles'])) print("upseries", upseries) # Create puzzle sequence # call to make_puzzle_list goes about here # Instantiate puzzles per Gellermann sequence puzzles = [] for stimi in upseries: stimn = int(stimi) myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']), features=pz['puzzles'][stimn]['features'], rewards=pz['puzzles'][stimn]['rewards'] ) puzzles.append(myp) # Create conveyor belt world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, agentclass=Agent, randomize = False, alpha=0.005) # Add puzzles for pi in puzzles: world.append(pi) return world def do_experiment(): # Experiment instance print('creating myexp') myexp = Experiment() print('setting agentclass') myexp.set_agentclass(Agent) print('setting environclass') myexp.set_environclass(ConvBelt) print('setting evolverclass') myexp.set_evolverclass(EvolveWeights) print('setting evolver_attributes') myexp.set_evolver_attributes() # defaults print('setting environ_maker') myexp.set_environ_maker(exp1_environment) # sets function print('making environment') myexp.make_environ() # Calls function print('making evolver_instance') myexp.make_evolver_instance() if myexp.validate(): print('running driver') myexp.evolver.driver() else: print("Experiment failed to validate.") if __name__ == "__main__": print("exp1.py start...") do_experiment() print("exp1.py done.")