code/core/reward.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

#include <vector>

#include "simulate.h"
#include "bin.h"

#include "reward.h"

// only called once to init the da-reward system; later the more specific events are called
void Reward::vexecute() {
  // for ease of viewing ascendingly select 50 neurons per symbol
  inputNeurons  = new IOPop(g.trainer_numSymbols);
  outputNeurons = new IOPop(g.trainer_numSymbols);
  for (int i = 0; i<g.trainer_numSymbols; i++)
    for (int j=i*50; j<(i+1)*50; j++) {
      (*inputNeurons)[i].insert(j);
      (*outputNeurons)[i].insert(799 - j);
    }


  // present the first symbol after 0.5 second
  s.addEvent(new Reward_Input(time + 10, inputNeurons, outputNeurons));
}

// choose and present a random input, then wait a random time
void Reward_Input::vexecute() {
  int symbol = rand() % g.trainer_numSymbols;
  fprintf(stderr, "Pin: %d                              \n", symbol);

  // excite symbol specific input neurons with 0.1 V
  for (std::set<int>::iterator i = (*inputNeurons)[symbol].begin(); i != (*inputNeurons)[symbol].end(); i++)
    s.addEvent(new ExternalSpike(time + drand48() * 0.001, *i, 0.15));

  // wait a (not yet) random time until evaluation
  s.addEvent(new Reward_EnableBinning(time + g.trainer_eval_delay, inputNeurons, outputNeurons, symbol));
}

// start the binning of relevant neurons
void Reward_EnableBinning::vexecute() {
  // add bins
  vector<Bin*> *bins = new vector<Bin*>(g.trainer_numSymbols);
  for (int i=0; i < (*outputNeurons).size(); i++) {
    Bin *b = new Bin(&((*outputNeurons)[i]));
    s.to.traceBinSets.insert(b);
    (*bins)[i] = b;
  }

  // wait a small amount until reading the result of this binning
  double delay = g.trainer_rd_c1
               + g.trainer_rd_c2 * time
               + g.trainer_rd_c3 * drand48()
               + g.trainer_rd_c4 * time * drand48();
  s.addEvent(new Reward_Readout(time + delay, symbol, inputNeurons, outputNeurons, bins));
}

// read the output frequencies and give reward
void Reward_Readout::vexecute() {
  if (estimatePerformance() > 1.0) {
    deployReward(g.trainer_rewardAmount);
  }else{
    deployReward(-g.trainer_rewardAmount);
  }

  // delete bin trace commands
  for (int i=0; i<bins->size(); i++) {
    s.to.traceBinSets.erase((*bins)[i]);
    delete (*bins)[i];
  }
  delete bins;

  // wait a refractory time
  s.addEvent(new Reward_Input(time + g.trainer_refractoryTime, inputNeurons, outputNeurons));
}

double Reward_Readout::estimatePerformance() {
  int max_freq = 0, // max. freq of all populations _except_ target population
    target_freq;

  for (int i=0; i<bins->size(); i++) {
    fprintf(stderr, "Pout: %d -> %d              \n", i, (*bins)[i]->count);
    if (i == symbol) {
      target_freq = (*bins)[i]->count;
    }else{
      max_freq = max(max_freq, (*bins)[i]->count);
    }
  }

  double res;
  if (max_freq == 0) {
    if (target_freq == 0) {
      res = 0.0;
    }else{
      res = ((double) target_freq) * INFINITY;
    }
  }else{
    res = ((double) target_freq) / max_freq;
  }

  fprintf(stderr, "PERF: %f\n", res);
  return res;
}

void Reward_Readout::deployReward(double reward) {
  g.dopamin_level += reward;
  s.da_history.push_front(pair<double,double>(time, g.dopamin_level));
}