{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b067867a-c1bc-4769-a6ac-15e7277ab8e2", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np, itertools, copy\n", "import matplotlib.pyplot as plt\n", "from collections import defaultdict\n", "import importlib # module reloading\n", "\n", "# allow importing from the 'code/' dir\n", "import sys\n", "sys.path.append(\"../code\")\n", "\n", "import environments\n", "import agents\n", "# always forces a reload in case you have edited environments or agents\n", "importlib.reload(environments)\n", "importlib.reload(agents)\n", "from environments.puzzle import Puzzle, ConvBelt, Action, getActionSpace, getObservationSpace\n", "from agents.q_agent import Agent\n", "\n", "import copy # allows duplicating puzzles into unique puzzles, otherwise python refs are shallow-copied\n", "maxrewards = [1] # could have multiple levels of 'goodness'\n", "\n", "# Create a puzzle with 4 states:\n", "# state 0: first presentation\n", "# state 1: getting passed over, advancing on belt (not really a state, more a placeholder)\n", "# state 2: investigated (more sensory information is available when examined closely)\n", "# state 3: consumed (saturating state with possible reward)\n", "easy_puzzle_tt = np.array([[0,0,2,3], # state 0: first presentation\n", " [0,0,0,0], # state 1: getting passed over (placeholder)\n", " [2,0,2,3], # state 2: investigated\n", " [3,3,3,3]]) # state 3: consumed\n", "# example puzzle with 2 sensorial dimensions\n", "easy_puzzle_features = [[0,1], # state 0: Empty/Unknown & Spikes\n", " [0,1], # state 1: Empty/Unknown & Spikes\n", " [3,1], # state 2: Red & Spikes\n", " [0,0]] # state 3: Empty/Unknown & Empty/Unknown\n", "easy_puzzle_rewards = [-1, # state 0: first look\n", " -1, # state 1: proceeding to next puzzle (placeholder)\n", " -1, # state 2: investigate\n", " 1] # state 3: consume (could be -10 poisonous! or -1 empty/useless)\n", "p1 = Puzzle(tt = easy_puzzle_tt,\n", " features = easy_puzzle_features,\n", " rewards = easy_puzzle_rewards)\n", "p2 = copy.deepcopy(p1)\n", "puzzles = (p1,p2)\n", "\n", "\n", "obsSpace = getObservationSpace(puzzles)\n", "actSpace = getActionSpace(puzzles)\n", "\n", "\n", "env = ConvBelt(actionSpace = getActionSpace(puzzles), # indicate number of actions agent can take\n", " observationSpace = getObservationSpace(puzzles), # indicate number of sensorial dimensions and sizes\n", " maxRewards = maxrewards, # rewards that constitute postive rewards\n", " randomize = False, # randomize puzzle positions on belt at each reset()\n", " )\n", "\n", "# can use append() or extend()\n", "env.append(p1)\n", "env.append(p2)\n", "\n", "# domain-specific settings\n", "num_trials=200\n", "n_actions = 4\n", "#(optimal lmbda in the agent is domain dependent - could be evolved)\n", "HARD_TIME_LIMIT = 600\n", "#KILLED_REWARD = -10 # not used here\n", "#(standard reward) = -1.0 (means agent is potentially wasting time - set internal to agent code)\n", "#(goal reward) = 1.0 (means the agent achieved something good - set internal to agent code)\n", "\n", "agent = Agent(obsSpace=obsSpace, actSpace=actSpace, alpha=0.1, gamma=0.95, epsilon=0.01, lmbda=0.42)\n", "# alpha # how much to weigh reward surprises that deviate from expectation\n", "# gamma # how important exepcted rewards will be\n", "# epsilon # fraction of exploration to exploitation (how often to choose a random action)\n", "# lmbda # how slowly memory of preceeding actions fades away (1=never, 0=\n", "\n", "time_to_solve_each_trial = []\n", "rewards = []\n", "\n", "for trialN in range(num_trials):\n", " # some output to see it running\n", " if (trialN % 10) == 0: print('.',end='')\n", " # initialize the agent, environment, and time for this trial\n", " agent.reset() # soft-reset() (keeps learned weights)\n", " nextState = env.reset()\n", " time = 0\n", " while True:\n", " time += 1\n", " # set agent senses based on environment and allow agent to determine an action\n", " agent.sensoryState = nextState\n", " agent.plasticUpdate()\n", " # determine effect on environment state & any reward (in standard openAI-gym API format)\n", " nextState, reward, goal_achieved, _ = env.step(agent.action)\n", " agent.reward = reward\n", " if env.puzzlesLeftToComplete == 0 or time == HARD_TIME_LIMIT:\n", " agent.plasticUpdate()\n", " break\n", " # could have deadly rewards that stop the trial early\n", " #elif reward <= -10:\n", " # agent.sensoryState = nextState\n", " # agent.reward = reward\n", " # agent.plasticUpdate()\n", " # agent.reset()\n", " # nextState = env.reset()\n", " rewards.append(reward)\n", " time_to_solve_each_trial.append(time)\n", " \n", " \n", "print()\n", "print(list(agent.weights.round(3)))\n", "#print(agent.timeSinceBigSurprise)\n", "plt.figure(figsize=(16,4),dpi=200)\n", "plt.plot(time_to_solve_each_trial)\n", "pt=15 # font point\n", "plt.title('Time until agent solved trial (puzzle boxes)', fontsize=pt)\n", "plt.xlabel('Trial', fontsize=pt)\n", "plt.ylabel('Time', fontsize=pt)\n", "#figure()\n", "#plot(rewards)\n", "env.render(agent);" ] }, { "cell_type": "code", "execution_count": null, "id": "0e22a5e6-47fb-45c0-905f-3fb5b6cc3980", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 }