alice/code/exp1.py

"""
exp1.py - instance of use of 'experiment.py'

Tasks:

  - Consider how to have a changing schedule of stimulus presentation

Need to have something where we can see evolution producing a trait that
would indicate interest in new things in the environment. Sets up conditions
where curiosity could be advantageous.

Conveyor belt needs to have the ability to introduce new things.

Single factor shift to start -- color of the thing ?
The introduction of novelty is the main thing, where the novelty is
associated with fitness advantage.

Simple systems to test
  - constant environment
  - switch between two different environments
    - frequency of shift makes a difference
    - Goldilocks zone for intermediate frequency
Controlled randomization
 - Known low-payoff 'food' in environment
 - Better thing has a cue
   - Changing frequency of presentation
     - Constant
     - Ramp
     - Cycle
   - 'Green' could indicate better but
     - x factor for better could be changed

For all of these, we can test unseen (novel) stimuli
 - Generalization can be tested
   - Cue of goodness
 - Proportion of time novel stimulus are rewarding
   - Must be a proportion to introduce unpredictability

One hypothesis: unpredictability between cues and rewards may lead to curiosity
 - Evolutionary timescale of unpredictability
 - Predictable lifetime

Push current code to repository.


"""


import sys
# allow importing from the 'code/' dir
sys.path.append("../code")

import os
import platform
import pickle
import json
import traceback
import datetime
import copy

import numpy as np # , itertools, copy
import matplotlib.pyplot as plt
from collections import defaultdict
import importlib  # module reloading

import environments
import agents

# always forces a reload in case you have edited environments or agents
importlib.reload(environments)
importlib.reload(agents)
#from environments.gridworld import GridWorld
import environments.puzzle as pz
from environments.puzzle import Puzzle, ConvBelt, getActionSpace, getObservationSpace

from agents.q_agent import EvolvableAgent as Agent

# DEAP imports

import random
from deap import creator, base, tools, algorithms

import multiprocessing

#pool = multiprocessing.Pool()
#toolbox.register("map", pool.map)

# Weight handling
from mda import MultiDimArray

# RESS
from ress import RESS

# EvolveWeights
# from ew import EvolveWeights
from curio_evolve_weights import EvolveWeights

# Experiment
from experiment import Experiment

def isotime():
    return datetime.datetime.now().isoformat()

def t2fn(timestamp):
    timestamp = timestamp.replace('.','_')
    timestamp = timestamp.replace(':','_')
    return timestamp

class Holder(object):
    """
    A general class for the equivalent of a digital duffle bag, each instance
    can have essentially whatever you want stuffed into it.

    This is essentially the very opposite of defining classes with the
    __slots__ convention, leaving the contents entirely open.

    I've found this useful for making context objects. If I am careful,
    the whole object can be serialized to disk and loaded later.
    """
    def __init__(self):
        pass

"""
Probability of reward at all
Probability of strength of reward

Variances:
 - How many puzzle cues do we have?
 - How often does a puzzle appear in training?
 - How often does a puzzle appear across evolutionary time?
 - How much reward does solving a puzzle deliver?

Two things , green | red
 green good
 red bad

Outcomes
 - Too unlikely -> no behavior to examine
 - Entirely predictable
 - In between -> curiosity has advantage

 First sample from uniform distribution to determine reward (0.5)
 Second : strngth of reward in conjunction with probability of reward (small freq but large reward, etc.)

Spot or range where it becomes advantageous to evolve a curiosity module...


Figuring out a representation that allows all the flexibility we discussed...

        "puzzles": [
            {
                "puzzle_description": "Appetitive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)
                "features": [[2], # state 0: Green
                             [2], # state 1: Green (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [
                            [-1, # state 0: first look
                            -1, # state 1: proceeding to next puzzle (placeholder)
                            1,   # state 2: consume (reward)
                            0.5  # Proportion
                            ],
                            [-1, # state 0: first look
                            -1, # state 1: proceeding to next puzzle (placeholder)
                            -1,   # state 2: consume (punishment)
                            0.5  # Proportion
                            ],
                          ]
            },
            {
                "puzzle_description": "Aversive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)],
                "features": [[1],  # state 0: Red
                             [1],  # state 1: Red (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [-1,  # state 0: first look
                            -1,  # state 1: proceeding to next puzzle (placeholder)
                            -2], # state 2: consume (punishment)
            },


"""

def exp1_environment(*args, **kwargs):

    unambiguous_puzzle_spec = {
        "puzzle_set_description": "Unambiguous puzzle set with 1 good, 1 bad puzzle",
        "puzzles": [
            {
                "puzzle_description": "Appetitive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)
                "features": [[2], # state 0: Green
                             [2], # state 1: Green (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [-1, # state 0: first look
                            -1, # state 1: proceeding to next puzzle (placeholder)
                            1],  # state 2: consume (reward)
            },
            {
                "puzzle_description": "Aversive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)],
                "features": [[1],  # state 0: Red
                             [1],  # state 1: Red (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [-1,  # state 0: first look
                            -1,  # state 1: proceeding to next puzzle (placeholder)
                            -2], # state 2: consume (punishment)
            },
        ]
    }

    ambiguous_puzzle_spec = {
        "puzzle_set_description": "Ambiguous puzzle set with 1 good, 1 bad puzzle.",
        "puzzles": [
            {
                "puzzle_description": "Appetitive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)
                "features": [[1], # state 0: Red
                             [1], # state 1: Red (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [-1, # state 0: first look
                            -1, # state 1: proceeding to next puzzle (placeholder)
                            1],  # state 2: consume (reward)
            },
            {
                "puzzle_description": "Aversive puzzle",
                "tt": [[0,0,2],  # state 0: first presentation
                       [0,0,0],  # state 1: getting passed over (placeholder)
                       [2,2,2]], # state 2: consumed (saturating)],
                "features": [[1],  # state 0: Red
                             [1],  # state 1: Red (placeholder)
                             [0]], # state 2: Empty/Unknown (after being eaten)
                "rewards": [-1,  # state 0: first look
                            -1,  # state 1: proceeding to next puzzle (placeholder)
                            -2], # state 2: consume (punishment)
            },
        ]
    }
    # Notion: Have an object to define a schedule of presentation of
    # environments, with the ability to stochastically present one of
    # a list of environments.
    exp_schedule = {
        "setlist": [
            {
                "desc": "Initial puzzle set",
                "specs": [unambiguous_puzzle_spec],
                "turns": 50,
                "num_stimuli": 6,
                "sequence_type": "fixed",
                "probs": [[1.0], [1.0]]
            },
            {
                "desc": "Stochastic puzzle sets",
                "specs": [unambiguous_puzzle_spec, ambiguous_puzzle_spec],
                "turns": 200,
                "num_stimuli": 6,
                "sequence_type": "stochastic",
                "probs": [[1.0, 0.0], [0.0, 1.0]]
            },
        ]
    }

    if 'num_puzzles_on_belt' in kwargs:
        num_puzzles_on_belt = 6

    pz = unambiguous_puzzle_spec
    if (1):
        maxrewards = [1]
        # Produce Gellermann sequence
        upress = RESS()
        print(dir(upress))
        print(pz['puzzles'])
        print(len(pz['puzzles']))
        upseries = upress.newress(num_puzzles_on_belt, len(pz['puzzles']))
        print("upseries", upseries)
        # Create puzzle sequence
        # Instantiate puzzles per Gellermann sequence
        puzzles = []
        for stimi in upseries:

            stimn = int(stimi)
            myp = Puzzle(tt=np.array(pz['puzzles'][stimn]['tt']),
                         features=pz['puzzles'][stimn]['features'],
                         rewards=pz['puzzles'][stimn]['rewards']
                         )
            puzzles.append(myp)
        # Create conveyor belt
        world = ConvBelt(actionSpace = getActionSpace(puzzles),
                         observationSpace = getObservationSpace(puzzles),
                         maxRewards = maxrewards,
                         agentclass=Agent,
                         randomize = False, alpha=0.005)
        # Add puzzles
        for pi in puzzles:
            world.append(pi)

    return world

def do_experiment():
    # Experiment instance
    myexp = Experiment()
    myexp.set_agentclass(Agent)
    myexp.set_environclass(ConvBelt)
    myexp.set_evolverclass(EvolveWeights)
    myexp.set_evolver_attributes()             # defaults
    myexp.set_environ_maker(exp1_environment)  # sets function
    myexp.make_environ()                       # Calls function
    myexp.make_evolver_instance()
    if myexp.validate():
        myexp.evolver.driver()
    else:
        print("Experiment failed to validate.")


if __name__ == "__main__":

    print("exp1.py start...")


    do_experiment()

    print("exp1.py done.")