From f06c74f26a3e3b9b6225c70fda283a97b0494919 Mon Sep 17 00:00:00 2001 From: Jan Huwald Date: Mon, 3 Jun 2013 17:02:01 +0200 Subject: more parallelism, cheap race check Assure that at last each parallel thread processes at least maxWorkdSize elements. This garantues that even for packed arrays of bit-sized elements that thread boundaries lie on machine word boundaries. diff --git a/cacount.cpp b/cacount.cpp index 295a40a..3cd755c 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -23,6 +23,11 @@ typedef uint32_t State; const State logState = BIT_WIDTH; const State maxState = 1 << logState; +// # of bits of largest memory fetch issued; machine-specific; used to +// garantue that sub-byte sized access of different threads never +// address the same word +const uint maxWordSize = 128; + bitset<8> rule(110); State update(State s) { @@ -37,7 +42,12 @@ State update(State s) { typedef array Trans; -void iterState(function f, int numThreads=1) { +void iterState(function f, bool parallel = false) { + int numThreads=1; + if (parallel) { + numThreads = min(thread::hardware_concurrency(), + maxState / maxWordSize); + } list tasks; for (int t=0; t f, int numThreads=1) { for (; !tasks.empty(); tasks.front()->join(), delete tasks.front(), tasks.pop_front()); } -void iterTrans(int times, function f, char *msg = nullptr) { +void iterTrans(int times, function f, char *msg = nullptr, bool parallel = false) { if (msg) cerr << msg << times << " left \r"; while (times--) { - iterState(f); + iterState(f, parallel); if (msg) cerr << msg << times << " left \r"; } if (msg) cerr << " \r"; @@ -61,22 +71,21 @@ void iterTrans(int times, function f, char *msg = nullptr) { void init(Trans &t) { iterState([&](int s) { - t[s] = update(s); }, - thread::hardware_concurrency()); + t[s] = update(s); }, true); } void findCycle(Trans &t, Trans &c, bitset &reachable) { // compute reachability iterState([&](int s) { reachable[t[s]] = 1; - }); + }, true); // forward to t=maxState; now every state is in a cycle iterTrans(logState, [&](int s) { t[s] = t[t[s]]; }, (char*) "fwd time: "); // compute loop id (lowest occuring state no): go through the loop again and // record the lowest occuring state number iterState([&](int s) { - c[s] = t[s]; }); + c[s] = t[s]; }, true); iterTrans(logState, [&](int s) { c[s] = min(c[s], c[t[s]]); t[s] = t[t[s]]; }, (char*) "cycles: "); -- cgit v0.10.1