1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
#include <vector>
#include "simulate.h"
#include "bin.h"
#include "reward.h"
// only called once to init the da-reward system; later the more specific events are called
void Reward::vexecute() {
// for ease of viewing ascendingly select 50 neurons per symbol
inputNeurons = new IOPop(g.trainer_numSymbols);
outputNeurons = new IOPop(g.trainer_numSymbols);
for (int i = 0; i<g.trainer_numSymbols; i++)
for (int j=i*50; j<(i+1)*50; j++) {
(*inputNeurons)[i].insert(j);
(*outputNeurons)[i].insert(799 - j);
}
// present the first symbol after 0.5 second
s.addEvent(new Reward_Input(time + 10, inputNeurons, outputNeurons));
}
// choose and present a random input, then wait a random time
void Reward_Input::vexecute() {
int symbol = rand() % g.trainer_numSymbols;
fprintf(stderr, "Pin: %d \n", symbol);
// excite symbol specific input neurons with 0.1 V
for (std::set<int>::iterator i = (*inputNeurons)[symbol].begin(); i != (*inputNeurons)[symbol].end(); i++)
s.addEvent(new ExternalSpike(time + drand48() * 0.001, *i, 0.15));
// wait a (not yet) random time until evaluation
s.addEvent(new Reward_EnableBinning(time + g.trainer_eval_delay, inputNeurons, outputNeurons, symbol));
}
// start the binning of relevant neurons
void Reward_EnableBinning::vexecute() {
// add bins
vector<Bin*> *bins = new vector<Bin*>(g.trainer_numSymbols);
for (int i=0; i < (*outputNeurons).size(); i++) {
Bin *b = new Bin(&((*outputNeurons)[i]));
s.to.traceBinSets.insert(b);
(*bins)[i] = b;
}
// wait a small amount until reading the result of this binning
double delay = g.trainer_rd_c1
+ g.trainer_rd_c2 * time
+ g.trainer_rd_c3 * drand48()
+ g.trainer_rd_c4 * time * drand48();
s.addEvent(new Reward_Readout(time + delay, symbol, inputNeurons, outputNeurons, bins));
}
// read the output frequencies and give reward
void Reward_Readout::vexecute() {
if (estimatePerformance() > 1.0) {
deployReward(g.trainer_rewardAmount);
}else{
deployReward(-g.trainer_rewardAmount);
}
// delete bin trace commands
for (int i=0; i<bins->size(); i++) {
s.to.traceBinSets.erase((*bins)[i]);
delete (*bins)[i];
}
delete bins;
// wait a refractory time
s.addEvent(new Reward_Input(time + g.trainer_refractoryTime, inputNeurons, outputNeurons));
}
double Reward_Readout::estimatePerformance() {
int max_freq = 0, // max. freq of all populations _except_ target population
target_freq;
for (int i=0; i<bins->size(); i++) {
fprintf(stderr, "Pout: %d -> %d \n", i, (*bins)[i]->count);
if (i == symbol) {
target_freq = (*bins)[i]->count;
}else{
max_freq = max(max_freq, (*bins)[i]->count);
}
}
double res;
if (max_freq == 0) {
if (target_freq == 0) {
res = 0.0;
}else{
res = ((double) target_freq) * INFINITY;
}
}else{
res = ((double) target_freq) / max_freq;
}
fprintf(stderr, "PERF: %f\n", res);
return res;
}
void Reward_Readout::deployReward(double reward) {
g.dopamin_level += reward;
s.da_history.push_front(pair<double,double>(time, g.dopamin_level));
}
|