summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Huwald <jh@sotun.de>2013-06-03 15:02:01 (GMT)
committerJan Huwald <jh@sotun.de>2013-06-03 15:02:01 (GMT)
commitf06c74f26a3e3b9b6225c70fda283a97b0494919 (patch)
tree71da03eac4b4e15d1cf5579eb86006b2902c8870
parent7500d7130461aa781527ba30a63726cd9cea9224 (diff)
more parallelism, cheap race check
Assure that at last each parallel thread processes at least maxWorkdSize elements. This garantues that even for packed arrays of bit-sized elements that thread boundaries lie on machine word boundaries.
-rw-r--r--cacount.cpp23
1 files changed, 16 insertions, 7 deletions
diff --git a/cacount.cpp b/cacount.cpp
index 295a40a..3cd755c 100644
--- a/cacount.cpp
+++ b/cacount.cpp
@@ -23,6 +23,11 @@ typedef uint32_t State;
const State logState = BIT_WIDTH;
const State maxState = 1 << logState;
+// # of bits of largest memory fetch issued; machine-specific; used to
+// garantue that sub-byte sized access of different threads never
+// address the same word
+const uint maxWordSize = 128;
+
bitset<8> rule(110);
State update(State s) {
@@ -37,7 +42,12 @@ State update(State s) {
typedef array<State, maxState> Trans;
-void iterState(function<void(int)> f, int numThreads=1) {
+void iterState(function<void(int)> f, bool parallel = false) {
+ int numThreads=1;
+ if (parallel) {
+ numThreads = min(thread::hardware_concurrency(),
+ maxState / maxWordSize);
+ }
list<thread*> tasks;
for (int t=0; t<numThreads; t++) {
tasks.push_front(new thread([=]{
@@ -50,10 +60,10 @@ void iterState(function<void(int)> f, int numThreads=1) {
for (; !tasks.empty(); tasks.front()->join(), delete tasks.front(), tasks.pop_front());
}
-void iterTrans(int times, function<void(int)> f, char *msg = nullptr) {
+void iterTrans(int times, function<void(int)> f, char *msg = nullptr, bool parallel = false) {
if (msg) cerr << msg << times << " left \r";
while (times--) {
- iterState(f);
+ iterState(f, parallel);
if (msg) cerr << msg << times << " left \r";
}
if (msg) cerr << " \r";
@@ -61,22 +71,21 @@ void iterTrans(int times, function<void(int)> f, char *msg = nullptr) {
void init(Trans &t) {
iterState([&](int s) {
- t[s] = update(s); },
- thread::hardware_concurrency());
+ t[s] = update(s); }, true);
}
void findCycle(Trans &t, Trans &c, bitset<maxState> &reachable) {
// compute reachability
iterState([&](int s) {
reachable[t[s]] = 1;
- });
+ }, true);
// forward to t=maxState; now every state is in a cycle
iterTrans(logState, [&](int s) {
t[s] = t[t[s]]; }, (char*) "fwd time: ");
// compute loop id (lowest occuring state no): go through the loop again and
// record the lowest occuring state number
iterState([&](int s) {
- c[s] = t[s]; });
+ c[s] = t[s]; }, true);
iterTrans(logState, [&](int s) {
c[s] = min(c[s], c[t[s]]);
t[s] = t[t[s]]; }, (char*) "cycles: ");
contact: Jan Huwald // Impressum