diff options
author | Jan Huwald <jh@sotun.de> | 2013-06-03 15:02:01 (GMT) |
---|---|---|
committer | Jan Huwald <jh@sotun.de> | 2013-06-03 15:02:01 (GMT) |
commit | f06c74f26a3e3b9b6225c70fda283a97b0494919 (patch) | |
tree | 71da03eac4b4e15d1cf5579eb86006b2902c8870 | |
parent | 7500d7130461aa781527ba30a63726cd9cea9224 (diff) |
more parallelism, cheap race check
Assure that at last each parallel thread processes at least
maxWorkdSize elements. This garantues that even for packed arrays of
bit-sized elements that thread boundaries lie on machine word boundaries.
-rw-r--r-- | cacount.cpp | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/cacount.cpp b/cacount.cpp index 295a40a..3cd755c 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -23,6 +23,11 @@ typedef uint32_t State; const State logState = BIT_WIDTH; const State maxState = 1 << logState; +// # of bits of largest memory fetch issued; machine-specific; used to +// garantue that sub-byte sized access of different threads never +// address the same word +const uint maxWordSize = 128; + bitset<8> rule(110); State update(State s) { @@ -37,7 +42,12 @@ State update(State s) { typedef array<State, maxState> Trans; -void iterState(function<void(int)> f, int numThreads=1) { +void iterState(function<void(int)> f, bool parallel = false) { + int numThreads=1; + if (parallel) { + numThreads = min(thread::hardware_concurrency(), + maxState / maxWordSize); + } list<thread*> tasks; for (int t=0; t<numThreads; t++) { tasks.push_front(new thread([=]{ @@ -50,10 +60,10 @@ void iterState(function<void(int)> f, int numThreads=1) { for (; !tasks.empty(); tasks.front()->join(), delete tasks.front(), tasks.pop_front()); } -void iterTrans(int times, function<void(int)> f, char *msg = nullptr) { +void iterTrans(int times, function<void(int)> f, char *msg = nullptr, bool parallel = false) { if (msg) cerr << msg << times << " left \r"; while (times--) { - iterState(f); + iterState(f, parallel); if (msg) cerr << msg << times << " left \r"; } if (msg) cerr << " \r"; @@ -61,22 +71,21 @@ void iterTrans(int times, function<void(int)> f, char *msg = nullptr) { void init(Trans &t) { iterState([&](int s) { - t[s] = update(s); }, - thread::hardware_concurrency()); + t[s] = update(s); }, true); } void findCycle(Trans &t, Trans &c, bitset<maxState> &reachable) { // compute reachability iterState([&](int s) { reachable[t[s]] = 1; - }); + }, true); // forward to t=maxState; now every state is in a cycle iterTrans(logState, [&](int s) { t[s] = t[t[s]]; }, (char*) "fwd time: "); // compute loop id (lowest occuring state no): go through the loop again and // record the lowest occuring state number iterState([&](int s) { - c[s] = t[s]; }); + c[s] = t[s]; }, true); iterTrans(logState, [&](int s) { c[s] = min(c[s], c[t[s]]); t[s] = t[t[s]]; }, (char*) "cycles: "); |