summaryrefslogtreecommitdiff
path: root/code/core/reward.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'code/core/reward.cpp')
-rw-r--r--code/core/reward.cpp105
1 files changed, 105 insertions, 0 deletions
diff --git a/code/core/reward.cpp b/code/core/reward.cpp
new file mode 100644
index 0000000..0738c19
--- /dev/null
+++ b/code/core/reward.cpp
@@ -0,0 +1,105 @@
+#include <vector>
+
+#include "simulate.h"
+#include "bin.h"
+
+#include "reward.h"
+
+// only called once to init the da-reward system; later the more specific events are called
+void Reward::vexecute() {
+ // for ease of viewing ascendingly select 50 neurons per symbol
+ inputNeurons = new IOPop(g.trainer_numSymbols);
+ outputNeurons = new IOPop(g.trainer_numSymbols);
+ for (int i = 0; i<g.trainer_numSymbols; i++)
+ for (int j=i*50; j<(i+1)*50; j++) {
+ (*inputNeurons)[i].insert(j);
+ (*outputNeurons)[i].insert(799 - j);
+ }
+
+
+ // present the first symbol after 0.5 second
+ s.addEvent(new Reward_Input(time + 10, inputNeurons, outputNeurons));
+}
+
+// choose and present a random input, then wait a random time
+void Reward_Input::vexecute() {
+ int symbol = rand() % g.trainer_numSymbols;
+ fprintf(stderr, "Pin: %d \n", symbol);
+
+ // excite symbol specific input neurons with 0.1 V
+ for (std::set<int>::iterator i = (*inputNeurons)[symbol].begin(); i != (*inputNeurons)[symbol].end(); i++)
+ s.addEvent(new ExternalSpike(time + drand48() * 0.001, *i, 0.15));
+
+ // wait a (not yet) random time until evaluation
+ s.addEvent(new Reward_EnableBinning(time + g.trainer_eval_delay, inputNeurons, outputNeurons, symbol));
+}
+
+// start the binning of relevant neurons
+void Reward_EnableBinning::vexecute() {
+ // add bins
+ vector<Bin*> *bins = new vector<Bin*>(g.trainer_numSymbols);
+ for (int i=0; i < (*outputNeurons).size(); i++) {
+ Bin *b = new Bin(&((*outputNeurons)[i]));
+ s.to.traceBinSets.insert(b);
+ (*bins)[i] = b;
+ }
+
+ // wait a small amount until reading the result of this binning
+ double delay = g.trainer_rd_c1
+ + g.trainer_rd_c2 * time
+ + g.trainer_rd_c3 * drand48()
+ + g.trainer_rd_c4 * time * drand48();
+ s.addEvent(new Reward_Readout(time + delay, symbol, inputNeurons, outputNeurons, bins));
+}
+
+// read the output frequencies and give reward
+void Reward_Readout::vexecute() {
+ if (estimatePerformance() > 1.0) {
+ deployReward(g.trainer_rewardAmount);
+ }else{
+ deployReward(-g.trainer_rewardAmount);
+ }
+
+ // delete bin trace commands
+ for (int i=0; i<bins->size(); i++) {
+ s.to.traceBinSets.erase((*bins)[i]);
+ delete (*bins)[i];
+ }
+ delete bins;
+
+ // wait a refractory time
+ s.addEvent(new Reward_Input(time + g.trainer_refractoryTime, inputNeurons, outputNeurons));
+}
+
+double Reward_Readout::estimatePerformance() {
+ int max_freq = 0, // max. freq of all populations _except_ target population
+ target_freq;
+
+ for (int i=0; i<bins->size(); i++) {
+ fprintf(stderr, "Pout: %d -> %d \n", i, (*bins)[i]->count);
+ if (i == symbol) {
+ target_freq = (*bins)[i]->count;
+ }else{
+ max_freq = max(max_freq, (*bins)[i]->count);
+ }
+ }
+
+ double res;
+ if (max_freq == 0) {
+ if (target_freq == 0) {
+ res = 0.0;
+ }else{
+ res = ((double) target_freq) * INFINITY;
+ }
+ }else{
+ res = ((double) target_freq) / max_freq;
+ }
+
+ fprintf(stderr, "PERF: %f\n", res);
+ return res;
+}
+
+void Reward_Readout::deployReward(double reward) {
+ g.dopamin_level += reward;
+ s.da_history.push_front(pair<double,double>(time, g.dopamin_level));
+}
contact: Jan Huwald // Impressum