#include #include "simulate.h" #include "bin.h" #include "reward.h" // only called once to init the da-reward system; later the more specific events are called void Reward::vexecute() { // for ease of viewing ascendingly select 50 neurons per symbol inputNeurons = new IOPop(g.trainer_numSymbols); outputNeurons = new IOPop(g.trainer_numSymbols); for (int i = 0; i::iterator i = (*inputNeurons)[symbol].begin(); i != (*inputNeurons)[symbol].end(); i++) s.addEvent(new ExternalSpike(time + drand48() * 0.001, *i, 0.15)); // wait a (not yet) random time until evaluation s.addEvent(new Reward_EnableBinning(time + g.trainer_eval_delay, inputNeurons, outputNeurons, symbol)); } // start the binning of relevant neurons void Reward_EnableBinning::vexecute() { // add bins vector *bins = new vector(g.trainer_numSymbols); for (int i=0; i < (*outputNeurons).size(); i++) { Bin *b = new Bin(&((*outputNeurons)[i])); s.to.traceBinSets.insert(b); (*bins)[i] = b; } // wait a small amount until reading the result of this binning double delay = g.trainer_rd_c1 + g.trainer_rd_c2 * time + g.trainer_rd_c3 * drand48() + g.trainer_rd_c4 * time * drand48(); s.addEvent(new Reward_Readout(time + delay, symbol, inputNeurons, outputNeurons, bins)); } // read the output frequencies and give reward void Reward_Readout::vexecute() { if (estimatePerformance() > 1.0) { deployReward(g.trainer_rewardAmount); }else{ deployReward(-g.trainer_rewardAmount); } // delete bin trace commands for (int i=0; isize(); i++) { s.to.traceBinSets.erase((*bins)[i]); delete (*bins)[i]; } delete bins; // wait a refractory time s.addEvent(new Reward_Input(time + g.trainer_refractoryTime, inputNeurons, outputNeurons)); } double Reward_Readout::estimatePerformance() { int max_freq = 0, // max. freq of all populations _except_ target population target_freq; for (int i=0; isize(); i++) { fprintf(stderr, "Pout: %d -> %d \n", i, (*bins)[i]->count); if (i == symbol) { target_freq = (*bins)[i]->count; }else{ max_freq = max(max_freq, (*bins)[i]->count); } } double res; if (max_freq == 0) { if (target_freq == 0) { res = 0.0; }else{ res = ((double) target_freq) * INFINITY; } }else{ res = ((double) target_freq) / max_freq; } fprintf(stderr, "PERF: %f\n", res); return res; } void Reward_Readout::deployReward(double reward) { g.dopamin_level += reward; s.da_history.push_front(pair(time, g.dopamin_level)); }