diff options
Diffstat (limited to 'code/core/reward.cpp')
-rw-r--r-- | code/core/reward.cpp | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/code/core/reward.cpp b/code/core/reward.cpp new file mode 100644 index 0000000..0738c19 --- /dev/null +++ b/code/core/reward.cpp @@ -0,0 +1,105 @@ +#include <vector> + +#include "simulate.h" +#include "bin.h" + +#include "reward.h" + +// only called once to init the da-reward system; later the more specific events are called +void Reward::vexecute() { + // for ease of viewing ascendingly select 50 neurons per symbol + inputNeurons = new IOPop(g.trainer_numSymbols); + outputNeurons = new IOPop(g.trainer_numSymbols); + for (int i = 0; i<g.trainer_numSymbols; i++) + for (int j=i*50; j<(i+1)*50; j++) { + (*inputNeurons)[i].insert(j); + (*outputNeurons)[i].insert(799 - j); + } + + + // present the first symbol after 0.5 second + s.addEvent(new Reward_Input(time + 10, inputNeurons, outputNeurons)); +} + +// choose and present a random input, then wait a random time +void Reward_Input::vexecute() { + int symbol = rand() % g.trainer_numSymbols; + fprintf(stderr, "Pin: %d \n", symbol); + + // excite symbol specific input neurons with 0.1 V + for (std::set<int>::iterator i = (*inputNeurons)[symbol].begin(); i != (*inputNeurons)[symbol].end(); i++) + s.addEvent(new ExternalSpike(time + drand48() * 0.001, *i, 0.15)); + + // wait a (not yet) random time until evaluation + s.addEvent(new Reward_EnableBinning(time + g.trainer_eval_delay, inputNeurons, outputNeurons, symbol)); +} + +// start the binning of relevant neurons +void Reward_EnableBinning::vexecute() { + // add bins + vector<Bin*> *bins = new vector<Bin*>(g.trainer_numSymbols); + for (int i=0; i < (*outputNeurons).size(); i++) { + Bin *b = new Bin(&((*outputNeurons)[i])); + s.to.traceBinSets.insert(b); + (*bins)[i] = b; + } + + // wait a small amount until reading the result of this binning + double delay = g.trainer_rd_c1 + + g.trainer_rd_c2 * time + + g.trainer_rd_c3 * drand48() + + g.trainer_rd_c4 * time * drand48(); + s.addEvent(new Reward_Readout(time + delay, symbol, inputNeurons, outputNeurons, bins)); +} + +// read the output frequencies and give reward +void Reward_Readout::vexecute() { + if (estimatePerformance() > 1.0) { + deployReward(g.trainer_rewardAmount); + }else{ + deployReward(-g.trainer_rewardAmount); + } + + // delete bin trace commands + for (int i=0; i<bins->size(); i++) { + s.to.traceBinSets.erase((*bins)[i]); + delete (*bins)[i]; + } + delete bins; + + // wait a refractory time + s.addEvent(new Reward_Input(time + g.trainer_refractoryTime, inputNeurons, outputNeurons)); +} + +double Reward_Readout::estimatePerformance() { + int max_freq = 0, // max. freq of all populations _except_ target population + target_freq; + + for (int i=0; i<bins->size(); i++) { + fprintf(stderr, "Pout: %d -> %d \n", i, (*bins)[i]->count); + if (i == symbol) { + target_freq = (*bins)[i]->count; + }else{ + max_freq = max(max_freq, (*bins)[i]->count); + } + } + + double res; + if (max_freq == 0) { + if (target_freq == 0) { + res = 0.0; + }else{ + res = ((double) target_freq) * INFINITY; + } + }else{ + res = ((double) target_freq) / max_freq; + } + + fprintf(stderr, "PERF: %f\n", res); + return res; +} + +void Reward_Readout::deployReward(double reward) { + g.dopamin_level += reward; + s.da_history.push_front(pair<double,double>(time, g.dopamin_level)); +} |