diff options
author | Jan Huwald <jh@sotun.de> | 2013-06-24 09:20:54 (GMT) |
---|---|---|
committer | Jan Huwald <jh@sotun.de> | 2013-06-24 09:20:54 (GMT) |
commit | 21d9e26dfb72ba6d0fa4f3f40a439e51d943d349 (patch) | |
tree | 9d19dd79427c411cd8ac8f17cd5465158d838f52 | |
parent | aac019848c8813fe9dff3a8e9128c418f1225cda (diff) |
support canceling iterTrans if no work is done anymore
A bool is passed to the worker function to store wether any work has
been done in an iterState round. To prevent cache line bouncing, each
worker thread has its own cache line aligned bool which is ultimately
merged.
-rw-r--r-- | cachepad.hpp | 15 | ||||
-rw-r--r-- | cacount.cpp | 33 |
2 files changed, 38 insertions, 10 deletions
diff --git a/cachepad.hpp b/cachepad.hpp new file mode 100644 index 0000000..beb7528 --- /dev/null +++ b/cachepad.hpp @@ -0,0 +1,15 @@ +#pragma once + +template<typename T> +struct cache_pad { + template<typename ...Arg> + cache_pad(Arg... args) : val(args...) {} + + operator T () { return val; } + T& operator () () { return val; } + + union { + T val; + char pad[(sizeof(T) + 63) / 64 * 64]; + }; +} __attribute__ ((aligned (64))); // TODO: port to C++11 alignas once GCC support is old enough diff --git a/cacount.cpp b/cacount.cpp index 882e4a6..f135936 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -12,6 +12,7 @@ #include <thread> #include <boost/integer.hpp> +#include "cachepad.hpp" #include "mmalloc.hpp" #include "timer.hpp" @@ -44,31 +45,43 @@ State update(State s) { typedef array<State, numState> Trans; typedef array<uint8_t, numState> pbitset; -void iterState(function<void(int)> f, optional<string> msg = optional<string>(), bool parallel = false) { +bool iterStateP(function<void(State, bool&)> f, optional<string> msg = optional<string>(), bool parallel = false, bool skipWorkTest = false) { PerfPrinter perfPrinter(msg); - int numThreads=1; - if (parallel) { + int numThreads = 1; + if (parallel) numThreads = min<uint64_t>(thread::hardware_concurrency(), numState); - } + cache_pad<bool> *perThreadWorked = new cache_pad<bool>[numThreads]; list<thread*> tasks; for (int t=0; t<numThreads; t++) { + perThreadWorked[t]() = false; tasks.push_front(new thread([=]{ for (StateIter s = numState / numThreads * t; s < ((t == numThreads - 1) ? numState : (numState / numThreads * (t+1))); s++) - f(s); + f(s, perThreadWorked[t]()); })); } - for (; !tasks.empty(); tasks.front()->join(), delete tasks.front(), tasks.pop_front()); + for (; !tasks.empty(); tasks.front()->join(), delete tasks.front(), tasks.pop_front()); + bool worked = skipWorkTest; + for (int t=0; t<numThreads; t++) + worked |= perThreadWorked[t]; + return worked; +} + +bool iterState(function<void(State)> f, optional<string> msg = optional<string>(), bool parallel = false) { + return iterStateP([=](State s, bool &) { f(s); }, msg, parallel, true); } -void iterTrans(int times, function<void(int)> f, optional<string> msg = optional<string>(), bool parallel = false) { +void iterTransP(int times, function<void(State, bool&)> f, optional<string> msg = optional<string>(), bool parallel = false, bool skipWorkTest = false) { PerfPrinter perfPrinter(msg); auto msg2 = [=,&msg] (int i) { return msg ? (*msg + string(" ") + to_string(times-i) + string("/") + to_string(times)) : msg; }; - while (times--) { - iterState(f, msg2(times), parallel); - } + while (times--) + if (not iterStateP(f, msg2(times), parallel, skipWorkTest)) return; +} + +void iterTrans(int times, function<void(State)> f, optional<string> msg = optional<string>(), bool parallel = false) { + iterTransP(times, [=](State s, bool &) { f(s); }, msg, parallel, true); } void init(Trans &t, Trans &c, pbitset &reachable) { |