""" exp1.py - instance of use of 'experiment.py' Tasks: - Consider how to have a changing schedule of stimulus presentation Need to have something where we can see evolution producing a trait that would indicate interest in new things in the environment. Sets up conditions where curiosity could be advantageous. Conveyor belt needs to have the ability to introduce new things. Single factor shift to start -- color of the thing ? The introduction of novelty is the main thing, where the novelty is associated with fitness advantage. Simple systems to test - constant environment - switch between two different environments - frequency of shift makes a difference - Goldilocks zone for intermediate frequency Controlled randomization - Known low-payoff 'food' in environment - Better thing has a cue - Changing frequency of presentation - Constant - Ramp - Cycle - 'Green' could indicate better but - x factor for better could be changed For all of these, we can test unseen (novel) stimuli - Generalization can be tested - Cue of goodness - Proportion of time novel stimulus are rewarding - Must be a proportion to introduce unpredictability One hypothesis: unpredictability between cues and rewards may lead to curiosity - Evolutionary timescale of unpredictability - Predictable lifetime Push current code to repository. """ import sys # allow importing from the 'code/' dir sys.path.append("../code") import os import platform import pickle import json import traceback import datetime import copy import numpy as np # , itertools, copy import matplotlib.pyplot as plt from collections import defaultdict import importlib # module reloading import environments import agents # always forces a reload in case you have edited environments or agents importlib.reload(environments) importlib.reload(agents) #from environments.gridworld import GridWorld import environments.puzzle as pz from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace from agents.q_agent import EvolvableAgent as Agent # DEAP imports import random from deap import creator, base, tools, algorithms import multiprocessing #pool = multiprocessing.Pool() #toolbox.register("map", pool.map) # Weight handling from mda import MultiDimArray # RESS from ress import RESS # EvolveWeights # from ew import EvolveWeights from curio_evolve_weights import EvolveWeights # Experiment from experiment import Experiment def isotime(): return datetime.datetime.now().isoformat() def t2fn(timestamp): timestamp = timestamp.replace('.','_') timestamp = timestamp.replace(':','_') return timestamp class Holder(object): """ A general class for the equivalent of a digital duffle bag, each instance can have essentially whatever you want stuffed into it. This is essentially the very opposite of defining classes with the __slots__ convention, leaving the contents entirely open. I've found this useful for making context objects. If I am careful, the whole object can be serialized to disk and loaded later. """ def __init__(self): pass """ Probability of reward at all Probability of strength of reward Variances: - How many puzzle cues do we have? - How often does a puzzle appear in training? - How often does a puzzle appear across evolutionary time? - How much reward does solving a puzzle deliver? Two things , green | red green good red bad Outcomes - Too unlikely -> no behavior to examine - Entirely predictable - In between -> curiosity has advantage First sample from uniform distribution to determine reward (0.5) Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.) Spot or range where it becomes advantageous to evolve a curiosity module... Figuring out a representation that allows all the flexibility we discussed... "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[2], # state 0: Green [2], # state 1: Green (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [ [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1, # state 2: consume (reward) 0.5 # Proportion ], [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -1, # state 2: consume (punishment) 0.5 # Proportion ], ] }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, """ def exp1_environment(*args, **kwargs): unambiguous_puzzle_spec = { "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[2], # state 0: Green [2], # state 1: Green (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } ambiguous_puzzle_spec = { "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.", "puzzles": [ { "puzzle_description": "Appetitive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating) "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) 1], # state 2: consume (reward) }, { "puzzle_description": "Aversive puzzle", "tt": [[0,0,2], # state 0: first presentation [0,0,0], # state 1: getting passed over (placeholder) [2,2,2]], # state 2: consumed (saturating)], "features": [[1], # state 0: Red [1], # state 1: Red (placeholder) [0]], # state 2: Empty/Unknown (after being eaten) "rewards": [-1, # state 0: first look -1, # state 1: proceeding to next puzzle (placeholder) -2], # state 2: consume (punishment) }, ] } # Notion: Have an object to define a schedule of presentation of # environments, with the ability to stochastically present one of # a list of environments. exp_schedule = { "setlist": [ { "desc": "Initial puzzle set", "specs": [unambiguous_puzzle_spec], "turns": 50, "num_stimuli": 6, "sequence_type": "fixed", "probs": [[1.0], [1.0]] }, { "desc": "Stochastic puzzle sets", "specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec], "turns": 200, "num_stimuli": 6, "sequence_type": "stochastic", "probs": [[1.0, 0.0], [0.0, 1.0]] }, ] } if 'num_puzzles_on_belt' in kwargs: num_puzzles_on_belt = 6 pz = unambiguous_puzzle_spec if (1): maxrewards = [1] # Produce Gellermann sequence upress = RESS() print(dir(upress)) print(pz['puzzles']) print(len(pz['puzzles'])) upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles'])) print("upseries", upseries) # Create puzzle sequence # Instantiate puzzles per Gellermann sequence puzzles = [] for stimi in upseries: stimn = int(stimi) myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']), features=pz['puzzles'][stimn]['features'], rewards=pz['puzzles'][stimn]['rewards'] ) puzzles.append(myp) # Create conveyor belt world = ConvBelt(actionSpace = getActionSpace(puzzles), observationSpace = getObservationSpace(puzzles), maxRewards = maxrewards, agentclass=Agent, randomize = False, alpha=0.005) # Add puzzles for pi in puzzles: world.append(pi) return world def do_experiment(): # Experiment instance myexp = Experiment() myexp.set_agentclass(Agent) myexp.set_environclass(ConvBelt) myexp.set_evolverclass(EvolveWeights) myexp.set_evolver_attributes() # defaults myexp.set_environ_maker(exp1_environment) # sets function myexp.make_environ() # Calls function myexp.make_evolver_instance() if myexp.validate(): myexp.evolver.driver() else: print("Experiment failed to validate.") if __name__ == "__main__": print("exp1.py start...") do_experiment() print("exp1.py done.")