summaryrefslogtreecommitdiff
path: root/code/trainer/reinforce_synapse.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'code/trainer/reinforce_synapse.cpp')
-rw-r--r--code/trainer/reinforce_synapse.cpp302
1 files changed, 302 insertions, 0 deletions
diff --git a/code/trainer/reinforce_synapse.cpp b/code/trainer/reinforce_synapse.cpp
new file mode 100644
index 0000000..bf6fc7f
--- /dev/null
+++ b/code/trainer/reinforce_synapse.cpp
@@ -0,0 +1,302 @@
+#include <stdlib.h>
+#include "fileutils.h"
+#include "math.h"
+
+#include "reinforce_synapse.h"
+#include "fileutils.cpp"
+#include "model_switch.h"
+
+using namespace std;
+
+int main(int argc, char **argv) {
+ // check cmd line sanity
+ if (argc != 7) {
+ fprintf(stderr, "Wrong argument count\n\n"
+ "Call format:\n"
+ "%s\n\t"
+ "performance out\n\t"
+ "trace cmd out\n\t"
+ "global out\n\t"
+ "global in\n\t"
+ "spike out\n\t"
+ "spike in\n\t"
+ "\n"
+ "Special names allowed:\n\t- (standart input)\n\t0 (/dev/null)\n", argv[0]);
+ return -1;
+ }
+
+ Trainer *t = new Trainer(argc, argv);
+ t->run();
+ // TODO: finalize
+}
+
+Trainer::Trainer(int argc, char** argv) {
+ // init vars
+ currentEpoch = 0;
+ dopamin_level = 0.0;
+
+ epochDuration = 0.01; // [s]
+ //epochDuration = 1.0; // [s]
+ entireDuration = 20000.0; // [s]
+ neurons = 2; // number of neurons to send noise to
+ freq = 1.0; // [Hz] per Neuron
+ voltage = 0.1; // [V]
+ da_single_reward = 0.01;
+
+ neuronFreq[0] = (map<int, int>*) NULL;
+ neuronFreq[1] = (map<int, int>*) NULL;
+
+ // open all file descriptors in an order complementary to the simulators one
+ // to avoid deadlocks
+ fd_spike_in = fd_magic(argv[6], false);
+ fd_global_in = fd_magic(argv[4], false);
+ fd_spike_out = fd_magic(argv[5], true);
+ fd_global_out = fd_magic(argv[3], true);
+ fd_performance_out = fd_magic(argv[1], true);
+ fd_trace_out = fd_magic(argv[2], true);
+
+ // init locks
+ pthread_mutex_init(&incomingSpikeLock, NULL);
+
+ // create read and write threads
+ pthread_create(&thread_read, NULL, (void* (*)(void*)) &read_spikes, this);
+ pthread_create(&thread_write, NULL, (void* (*)(void*)) &write_spikes, this);
+}
+
+void Trainer::run() {
+ // start an epoch
+ // wait for it's end
+ // process incomig spikes (binning)
+ // select if a reward takes place
+ // print reward value (TODO: into a seperate, externally given file descriptor)
+ // send out the reward signal
+
+ char *str_trace = "%f; spikes (0; 1); global; neuron (0; 1); synapse (0; 1)\n";
+
+ // send out the full trace commande once (later it will be repeated by sending newline)
+ fprintf(fd_trace_out, str_trace, epochDuration);
+ fflush(fd_trace_out);
+
+ // send the first two global states (at t=0 and t=1.5 [bintime] to allow the simulation to
+ // be initialized (before the causality of the loop below is met)
+ MS_Global msg;
+ msg_init(msg);
+ msg.dopamin_level = dopamin_level;
+
+ // set the tau-levels like in Izhi's network
+ msg.stdp_tau_minus = 1.5 * msg.stdp_tau_plus;
+ msg.stdp_lambda_plus = msg.stdp_lambda_minus;
+
+ fprintf(fd_global_out, "0.0, ");
+ msg_print(msg, fd_global_out);
+ fprintf(fd_global_out, "\n");
+
+ msg_process(msg, 1.5 * epochDuration);
+ dopamin_level = msg.dopamin_level;
+
+
+ fprintf(fd_global_out, "%f, ", 1.5 * epochDuration);
+ msg_print(msg, fd_global_out);
+ fprintf(fd_global_out, "\n");
+
+ fflush(fd_global_out);
+
+ // loop until the experiment is done
+ for (; currentEpoch * epochDuration < entireDuration; currentEpoch++) {
+ // send a new trace command (do it as early as possible although it is
+ // only executed after the new global is send out at the bottom of this loop)
+ if ((currentEpoch + 2) * epochDuration < entireDuration) {
+ // repeat the previous trace command
+ fprintf(fd_trace_out, "\n");
+ }else{
+ fprintf(fd_trace_out, str_trace, entireDuration - (currentEpoch + 1) * epochDuration);
+ }
+ fflush(fd_trace_out);
+
+ // wait for the end of the epoch (by reading the global state resulting from it)
+ char str_raw[128], str_msg[128]; str_raw[0] = 0;
+ double _foo_dbl;
+ if (fgets((char*) str_raw, 128, fd_global_in) == NULL) {
+ fprintf(stderr, "ERROR: global status file descriptor from simulator closed unexpectedly\n");
+ break;
+ }
+ if ((sscanf((char*) str_raw, "%lf, %[^\n]\n", &_foo_dbl, (char*) str_msg) != 2)
+ || (!msg_parse(msg, (char*) str_msg))) {
+ fprintf(stderr, "ERROR: reading global status from simulator failed\n\t\"%s\"\n", (char*) str_raw);
+ break;
+ }
+
+ // process incomig spikes (binning) of the previous epoch
+ if (currentEpoch > 0) {
+ // shift the bins
+ if (neuronFreq[0]) {
+ delete neuronFreq[0];
+ neuronFreq[0] = neuronFreq[1];
+ }else{
+ neuronFreq[0] = new map<int, int>();
+ }
+ neuronFreq[1] = new map<int, int>();
+
+ // read all spikes in the correct time window
+ pthread_mutex_lock(&incomingSpikeLock);
+ while ((!incomingSpikes.empty()) && (incomingSpikes.front().get<0>() <= currentEpoch * epochDuration)) {
+ // drop event out of queue
+ SpikeEvent se = incomingSpikes.front();
+ double time = se.get<0>();
+ int neuron = se.get<1>();
+ incomingSpikes.pop();
+
+ // check if it belongs to the previous bin (and ignore it if this is the case)
+ if (time < (currentEpoch - 1) * epochDuration) {
+ fprintf(stderr, "WARN: spike reading thread to slow; unprocessed spike of the past discovered\n%f\t%f\t%d\t%f\n", time, (double) (currentEpoch - 1) * epochDuration, currentEpoch, epochDuration);
+ continue;
+ }
+
+ // increment the frequency counter (relies on int being default constructable to value 0)
+ (*neuronFreq[1])[neuron]++;
+ }
+
+ pthread_mutex_unlock(&incomingSpikeLock);
+ }
+
+ // proceed the global state to keep it in sync with the simulator's global state
+ // the local dopamin level is kept seperately and aged only one epochDuration to
+ // avoid oscillation effects in dopamin level
+ msg_process(msg, 1.5 * epochDuration);
+ dopamin_level *= exp( - epochDuration / msg.dopamin_tau );
+
+ // select if the reward takes place
+ if ((currentEpoch > 1) && ((*neuronFreq[0])[0] > 0) && ((*neuronFreq[1])[1] > 0)) {
+ dopamin_level += da_single_reward;
+ fprintf(fd_performance_out, "+");
+ }else{
+ fprintf(fd_performance_out, "-");
+ }
+
+ if (currentEpoch > 1) {
+ //fprintf(fd_performance_out, "\n");
+ fprintf(fd_performance_out, "\t%f\t%d\t%d\n", dopamin_level, (*neuronFreq[0])[0], (*neuronFreq[1])[1]);
+ }else{
+ // fake output as acutal data i not available, yet
+ fprintf(fd_performance_out, "\t%f\t%d\t%d\n", dopamin_level, (int) 0, (int) 0);
+ }
+
+ // set the new DA level
+ msg.dopamin_level = dopamin_level;
+
+ // print new global state
+ // (do this even if there has been no evaluation of the performance yet,
+ // because it is neccessary for the simulator to proceed)
+
+ fprintf(fd_global_out, "%f, ", ((double) currentEpoch + 2.5) * epochDuration);
+ msg_print(msg, fd_global_out);
+ fprintf(fd_global_out, "\n");
+ fflush(fd_global_out);
+ }
+
+ fclose(fd_trace_out);
+
+ // terminate child threads
+ pthread_cancel(thread_read);
+ pthread_cancel(thread_write);
+}
+
+void *read_spikes(Trainer *t) {
+ double lastSpike = -INFINITY; // used to check if the spikes are coming in order
+
+ // read spikes until eternity
+ while (!feof(t->fd_spike_in)) {
+ // read one line from stdin (blocking)
+ char buf[128];
+ if (fgets((char*) buf, 128, t->fd_spike_in) == NULL) continue; // this should stop the loop because of EOF
+
+ // parse the input
+ double time, current;
+ int neuron;
+ switch (sscanf((char*) buf, "%lf, %d, %lf\n", &time, &neuron, &current)) {
+ case 3:
+ // format is ok, continue
+ break;
+ default:
+ // format is wrong, stop
+ fprintf(stderr, "ERROR: malformatted incoming spike:\n\t%s\n", &buf);
+ return NULL;
+ }
+
+ if (lastSpike > time) {
+ fprintf(stderr, "WARN: out of order spike detected (coming from simulator)\n\t%f\t%d\n", time, neuron);
+ continue;
+ }
+
+ lastSpike = time;
+
+ // add the spike to the queue of spikes
+ pthread_mutex_lock(&(t->incomingSpikeLock));
+ t->incomingSpikes.push(boost::make_tuple(time, neuron, current));
+ pthread_mutex_unlock(&(t->incomingSpikeLock));
+ }
+
+ // we shouldn't reach this point in a non-error case
+ fprintf(stderr, "ERROR: EOF in incoming spike stream\n");
+ // TODO: kill entire programm
+ return NULL;
+}
+
+void *write_spikes(Trainer *t) {
+ // at the moment: generate noise until the file descriptor blocks
+ double time = 0.0;
+
+ // PAR HINT:
+ // loop until exactly one spike after the entire duration is send out
+ // this will block on full buffer on the file descriptor and thus keep
+ // the thread busy early enough
+
+
+ /* // ---- send 100% dependent spike train ---
+ time = 0.005;
+ while (time <= t->entireDuration) {
+ fprintf(t->fd_spike_out, "%f, %d, %f\n", time, 0, 1.0);
+ time += 0.012;
+ fprintf(t->fd_spike_out, "%f, %d, %f\n", time, 1, 1.0);
+ time += 1.0;
+ }*/
+
+
+ // ---- send indepenent poisson noise ----
+ while (time <= t->entireDuration) {
+ // calc timing, intensity and destination of the spike
+ // HINT:
+ // * log(...) is negative
+ // * drand48() returns something in [0,1), to avoid log(0) we transform it to (0,1]
+ time -= log(1.0 - drand48()) / (t->freq * t->neurons);
+ int dst = rand() % t->neurons;
+ double current = t->voltage;
+
+ // send it to the simulator
+ fprintf(t->fd_spike_out, "%f, %d, %f\n", time, dst, current);
+ }
+
+ /*// ---- send indepenent poisson noise w7 increasing fequency----
+ double blafoo = 0;
+ t->freq = 1.0;
+ while (time <= t->entireDuration) {
+ if (time - blafoo > 100.0) {
+ blafoo += 200.0;
+ t->freq += 1.0;
+ time += 100.0; // time jump to let ET recover to zero
+ }
+ // calc timing, intensity and destination of the spike
+ // HINT:
+ // * log(...) is negative
+ // * drand48() returns something in [0,1), to avoid log(0) we transform it to (0,1]
+ time -= log(1.0 - drand48()) / (t->freq * t->neurons);
+ int dst = rand() % t->neurons;
+ double current = t->voltage;
+
+ // send it to the simulator
+ fprintf(t->fd_spike_out, "%f, %d, %f\n", time, dst, current);
+ }*/
+
+ // close fd because fscanf sucks
+ fclose(t->fd_spike_out);
+}
contact: Jan Huwald // Impressum