1 files changed, 302 insertions, 0 deletions
diff --git a/code/trainer/reinforce_synapse.cpp b/code/trainer/reinforce_synapse.cpp
new file mode 100644
index 0000000..bf6fc7f
--- /dev/null
+++ b/code/trainer/reinforce_synapse.cpp
@@ -0,0 +1,302 @@
+#include <stdlib.h>
+#include "fileutils.h"
+#include "math.h"
+
+#include "reinforce_synapse.h"
+#include "fileutils.cpp"
+#include "model_switch.h"
+
+using namespace std;
+
+int main(int argc, char **argv) {
+  // check cmd line sanity
+  if (argc != 7) {
+    fprintf(stderr, "Wrong argument count\n\n"
+	    "Call format:\n"
+	    "%s\n\t"
+	    "performance out\n\t"
+	    "trace cmd out\n\t"
+	    "global out\n\t"
+	    "global in\n\t"
+	    "spike out\n\t"
+	    "spike in\n\t"
+	    "\n"
+	    "Special names allowed:\n\t- (standart input)\n\t0 (/dev/null)\n", argv[0]);
+    return -1;
+  }
+
+  Trainer *t = new Trainer(argc, argv);
+  t->run();
+  // TODO: finalize
+}
+
+Trainer::Trainer(int argc, char** argv) {
+  // init vars
+  currentEpoch  = 0;
+  dopamin_level = 0.0;
+
+  epochDuration  = 0.01; // [s]
+  //epochDuration  = 1.0; // [s]
+  entireDuration = 20000.0; // [s]
+  neurons        = 2;    // number of neurons to send noise to
+  freq           = 1.0; // [Hz] per Neuron
+  voltage        = 0.1; // [V]
+  da_single_reward = 0.01;
+
+  neuronFreq[0] = (map<int, int>*) NULL;
+  neuronFreq[1] = (map<int, int>*) NULL;
+
+  // open all file descriptors in an order complementary to the simulators one
+  // to avoid deadlocks
+  fd_spike_in        = fd_magic(argv[6], false);
+  fd_global_in       = fd_magic(argv[4], false);
+  fd_spike_out       = fd_magic(argv[5], true);
+  fd_global_out      = fd_magic(argv[3], true);
+  fd_performance_out = fd_magic(argv[1], true);
+  fd_trace_out       = fd_magic(argv[2], true);
+
+  // init locks
+  pthread_mutex_init(&incomingSpikeLock, NULL);
+
+  // create read and write threads
+  pthread_create(&thread_read, NULL, (void* (*)(void*)) &read_spikes, this);
+  pthread_create(&thread_write, NULL, (void* (*)(void*)) &write_spikes, this);
+}
+
+void Trainer::run() {
+  // start an epoch
+  // wait for it's end
+  // process incomig spikes (binning)
+  // select if a reward takes place
+  // print reward value (TODO: into a seperate, externally given file descriptor)
+  // send out the reward signal
+
+  char *str_trace = "%f; spikes (0; 1); global; neuron (0; 1); synapse (0; 1)\n";
+
+  // send out the full trace commande once (later it will be repeated by sending newline)
+  fprintf(fd_trace_out, str_trace, epochDuration);
+  fflush(fd_trace_out);
+
+  // send the first two global states (at t=0 and t=1.5 [bintime] to allow the simulation to
+  // be initialized (before the causality of the loop below is met)
+  MS_Global msg;
+  msg_init(msg);
+  msg.dopamin_level = dopamin_level;
+
+  // set the tau-levels like in Izhi's network
+  msg.stdp_tau_minus = 1.5 * msg.stdp_tau_plus;
+  msg.stdp_lambda_plus = msg.stdp_lambda_minus;
+
+  fprintf(fd_global_out, "0.0, ");
+  msg_print(msg, fd_global_out);
+  fprintf(fd_global_out, "\n");
+
+  msg_process(msg, 1.5 * epochDuration);
+  dopamin_level = msg.dopamin_level;
+
+
+  fprintf(fd_global_out, "%f, ", 1.5 * epochDuration);
+  msg_print(msg, fd_global_out);
+  fprintf(fd_global_out, "\n");
+
+  fflush(fd_global_out);
+  
+  // loop until the experiment is done
+  for (; currentEpoch * epochDuration < entireDuration; currentEpoch++) {
+    // send a new trace command (do it as early as possible although it is
+    // only executed after the new global is send out at the bottom of this loop)
+    if ((currentEpoch + 2) * epochDuration < entireDuration) {
+      // repeat the previous trace command
+      fprintf(fd_trace_out, "\n");
+    }else{
+      fprintf(fd_trace_out, str_trace, entireDuration - (currentEpoch + 1) * epochDuration);
+    }
+    fflush(fd_trace_out);
+
+    // wait for the end of the epoch (by reading the global state resulting from it)
+    char str_raw[128], str_msg[128]; str_raw[0] = 0;
+    double _foo_dbl;
+    if (fgets((char*) str_raw, 128, fd_global_in) == NULL) {
+      fprintf(stderr, "ERROR: global status file descriptor from simulator closed unexpectedly\n");
+      break;
+    }
+    if ((sscanf((char*) str_raw, "%lf, %[^\n]\n", &_foo_dbl, (char*) str_msg) != 2)
+	|| (!msg_parse(msg, (char*) str_msg))) {
+      fprintf(stderr, "ERROR: reading global status from simulator failed\n\t\"%s\"\n", (char*) str_raw);
+      break;
+    }
+
+    // process incomig spikes (binning) of the previous epoch
+    if (currentEpoch > 0) {
+      // shift the bins
+      if (neuronFreq[0]) {
+	delete neuronFreq[0];
+	neuronFreq[0] = neuronFreq[1];
+      }else{
+	neuronFreq[0] = new map<int, int>();
+      }
+      neuronFreq[1] = new map<int, int>();
+
+      // read all spikes in the correct time window
+      pthread_mutex_lock(&incomingSpikeLock);
+      while ((!incomingSpikes.empty()) && (incomingSpikes.front().get<0>() <= currentEpoch * epochDuration)) {
+	// drop event out of queue
+	SpikeEvent se = incomingSpikes.front();
+	double time = se.get<0>();
+	int neuron = se.get<1>();
+	incomingSpikes.pop();
+
+	// check if it belongs to the previous bin (and ignore it if this is the case)
+	if (time < (currentEpoch - 1) * epochDuration) {
+	  fprintf(stderr, "WARN: spike reading thread to slow; unprocessed spike of the past discovered\n%f\t%f\t%d\t%f\n", time, (double) (currentEpoch - 1) * epochDuration, currentEpoch, epochDuration);
+	  continue;
+	}
+
+	// increment the frequency counter (relies on int being default constructable to value 0)
+	(*neuronFreq[1])[neuron]++;
+      }
+
+      pthread_mutex_unlock(&incomingSpikeLock);
+    }
+
+    // proceed the global state to keep it in sync with the simulator's global state
+    // the local dopamin level is kept seperately and aged only one epochDuration to
+    // avoid oscillation effects in dopamin level
+    msg_process(msg, 1.5 * epochDuration);
+    dopamin_level *= exp( - epochDuration / msg.dopamin_tau );
+
+    // select if the reward takes place
+    if ((currentEpoch > 1) && ((*neuronFreq[0])[0] > 0) && ((*neuronFreq[1])[1] > 0)) {
+      dopamin_level += da_single_reward;
+      fprintf(fd_performance_out, "+");
+    }else{
+      fprintf(fd_performance_out, "-");
+    }
+
+    if (currentEpoch > 1) {
+      //fprintf(fd_performance_out, "\n");
+      fprintf(fd_performance_out, "\t%f\t%d\t%d\n", dopamin_level, (*neuronFreq[0])[0], (*neuronFreq[1])[1]);
+    }else{
+      // fake output as acutal data i not available, yet
+      fprintf(fd_performance_out, "\t%f\t%d\t%d\n", dopamin_level, (int) 0, (int) 0);
+    }
+
+    // set the new DA level
+    msg.dopamin_level = dopamin_level;
+
+    // print new global state
+    // (do this even if there has been no evaluation of the performance yet, 
+    //  because it is neccessary for the simulator to proceed)
+
+    fprintf(fd_global_out, "%f, ", ((double) currentEpoch + 2.5) * epochDuration);
+    msg_print(msg, fd_global_out);
+    fprintf(fd_global_out, "\n");
+    fflush(fd_global_out);
+  }
+
+  fclose(fd_trace_out);
+
+  // terminate child threads
+  pthread_cancel(thread_read);
+  pthread_cancel(thread_write);
+}
+
+void *read_spikes(Trainer *t) {
+  double lastSpike = -INFINITY; // used to check if the spikes are coming in order
+
+  // read spikes until eternity
+  while (!feof(t->fd_spike_in)) {
+    // read one line from stdin (blocking)
+    char buf[128];
+    if (fgets((char*) buf, 128, t->fd_spike_in) == NULL) continue; // this should stop the loop because of EOF
+
+    // parse the input
+    double time, current;
+    int neuron;
+    switch (sscanf((char*) buf, "%lf, %d, %lf\n", &time, &neuron, &current)) {
+    case 3:
+      // format is ok, continue
+      break;
+    default:
+      // format is wrong, stop
+      fprintf(stderr, "ERROR: malformatted incoming spike:\n\t%s\n", &buf);
+      return NULL;
+    }
+
+    if (lastSpike > time) {
+      fprintf(stderr, "WARN: out of order spike detected (coming from simulator)\n\t%f\t%d\n", time, neuron);
+      continue;
+    }
+
+    lastSpike = time;
+
+    // add the spike to the queue of spikes
+    pthread_mutex_lock(&(t->incomingSpikeLock));
+    t->incomingSpikes.push(boost::make_tuple(time, neuron, current));
+    pthread_mutex_unlock(&(t->incomingSpikeLock));    
+  }
+
+  // we shouldn't reach this point in a non-error case
+  fprintf(stderr, "ERROR: EOF in incoming spike stream\n");
+  // TODO: kill entire programm
+  return NULL;
+}
+
+void *write_spikes(Trainer *t) {
+  // at the moment: generate noise until the file descriptor blocks
+  double time = 0.0;
+
+  // PAR HINT:
+  // loop until exactly one spike after the entire duration is send out
+  // this will block on full buffer on the file descriptor and thus keep
+  // the thread busy early enough
+
+
+  /* // ---- send 100% dependent spike train ---
+  time = 0.005;
+  while (time <= t->entireDuration) {
+    fprintf(t->fd_spike_out, "%f, %d, %f\n", time, 0, 1.0);
+        time += 0.012;
+    fprintf(t->fd_spike_out, "%f, %d, %f\n", time, 1, 1.0);
+    time += 1.0;
+  }*/
+
+  
+  // ---- send indepenent poisson noise ----
+  while (time <= t->entireDuration) {
+    // calc timing, intensity and destination of the spike
+    // HINT:
+    //   * log(...) is negative
+    //   * drand48() returns something in [0,1), to avoid log(0) we transform it to (0,1]
+    time -= log(1.0 - drand48()) / (t->freq * t->neurons);
+    int dst = rand() % t->neurons;
+    double current = t->voltage;
+    
+    // send it to the simulator
+    fprintf(t->fd_spike_out, "%f, %d, %f\n", time, dst, current);
+  }
+
+  /*// ---- send indepenent poisson noise w7 increasing fequency----
+  double blafoo = 0;
+  t->freq = 1.0;
+  while (time <= t->entireDuration) {
+    if (time - blafoo > 100.0) {
+      blafoo += 200.0;
+      t->freq += 1.0;
+      time += 100.0; // time jump to let ET recover to zero
+    }
+    // calc timing, intensity and destination of the spike
+    // HINT:
+    //   * log(...) is negative
+    //   * drand48() returns something in [0,1), to avoid log(0) we transform it to (0,1]
+    time -= log(1.0 - drand48()) / (t->freq * t->neurons);
+    int dst = rand() % t->neurons;
+    double current = t->voltage;
+    
+    // send it to the simulator
+    fprintf(t->fd_spike_out, "%f, %d, %f\n", time, dst, current);
+  }*/
+
+  // close fd because fscanf sucks
+  fclose(t->fd_spike_out);
+}