diff options
author | Jan Huwald <jh@sotun.de> | 2013-06-14 11:45:59 (GMT) |
---|---|---|
committer | Jan Huwald <jh@sotun.de> | 2013-06-14 11:45:59 (GMT) |
commit | b86aa1b30de15131ab61325d733e70d6deeaf0be (patch) | |
tree | a5bc098665f8b19eaef0fd20f804475203ace2b9 | |
parent | 955be6351131ae86ceb9c5b05ebb41f22bbecfed (diff) |
refractor, change cycle identification
Major changes:
- compute cycle id (cycle minimum) in parallel to future state
- skip computation of cycle stats for non-canonical cycles
Minor changes:
- move computation of several arrays around to improve locality and
reduce random memory reads
- distinguish between State and StateIter: the later can contain
maxState + 1 and is used in loops and as pointer.
- rename maxState to numState, add acutal maxState
- time performance of memory allocation
-rw-r--r-- | cacount.cpp | 152 |
1 files changed, 87 insertions, 65 deletions
diff --git a/cacount.cpp b/cacount.cpp index 61c8a67..3657687 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -22,9 +22,12 @@ using boost::optional; #define BIT_WIDTH 16 #endif -typedef typename boost::uint_t<BIT_WIDTH + 1>::least State; -const State logState = BIT_WIDTH; -const State maxState = (State) 1 << logState; +const uint8_t logState = BIT_WIDTH; +typedef typename boost::uint_t<logState >::least State; +typedef typename boost::uint_t<logState + 1>::least StateIter; +const StateIter numState = (StateIter) 1 << logState; +const StateIter nullState = ~((StateIter) 0); +const State maxState = ~((State) 0); bitset<8> rule(110); @@ -38,20 +41,20 @@ State update(State s) { return r; } -typedef array<State, maxState> Trans; -typedef array<uint8_t, maxState> pbitset; +typedef array<State, numState> Trans; +typedef array<uint8_t, numState> pbitset; void iterState(function<void(int)> f, optional<string> msg = optional<string>(), bool parallel = false) { PerfPrinter perfPrinter(msg); int numThreads=1; if (parallel) { - numThreads = min((State) thread::hardware_concurrency(), maxState); + numThreads = min<uint64_t>(thread::hardware_concurrency(), numState); } list<thread*> tasks; for (int t=0; t<numThreads; t++) { tasks.push_front(new thread([=]{ - for (State s = maxState / numThreads * t; - s < ((t == numThreads - 1) ? maxState : (maxState / numThreads * (t+1))); + for (StateIter s = numState / numThreads * t; + s < ((t == numThreads - 1) ? numState : (numState / numThreads * (t+1))); s++) f(s); })); @@ -68,78 +71,93 @@ void iterTrans(int times, function<void(int)> f, optional<string> msg = optional } } -void init(Trans &t) { - iterState([&](int s) { +void init(Trans &t, Trans &c, pbitset &reachable) { + iterState([&](State s) { t[s] = update(s); + c[s] = s; + reachable[t[s]] = 1; }, (string) "single step transition table", true); } -void findCycle(Trans &t, Trans &c, pbitset &reachable) { - // compute reachability - iterState([&](int s) { - reachable[t[s]] = 1; - }, (string) "reachability", true); - // forward to t=maxState; now every state is in a cycle - iterTrans(logState, [&](int s) { - t[s] = t[t[s]]; - }, (string) "fwd time"); - // compute loop id (lowest occuring state no): go through the loop again and - // record the lowest occuring state number - iterState([&](int s) { - c[s] = t[s]; - }, (string) "init cycles", true); - iterTrans(logState, [&](int s) { - c[s] = min<State>(c[s], c[t[s]]); - t[s] = t[t[s]]; - }, (string) "cycles"); +void findCycle(Trans &t, Trans &c) { + // forward to t=numState; now every state is in a cycle + iterTrans(logState, [&](State s) { + State n = t[s]; + t[s] = t[n]; + c[s] = min<State>(c[s], c[n]); + // TODO: test if writing only when value changed improves + // performance by reducing cache coherency pressure + // TODO: detect if anything change, skip later rounds then + }, (string) "fwd time", true); + // Transients may have a cycle id (minimum state) that is on the + // transient (not in the cycle) and thus different from the cycle id + // in the cycle. Thus we copy the cycle id from a state in the cycle + // to all states in the basin. + iterState([&](State s) { + c[s] = c[t[s]]; + }, (string) "fix transient cycle id", true); } -void cycleStat(Trans &c, pbitset &reachable) { +State canonize(State s) { + // TODO: test if a variant base on clz is faster + State cs = s; + for (int i=0; i<logState; i++) + cs = min<State>(cs, (((s<<i) | (s>>(logState-i))) & maxState)); + return cs; +}; + +void cycleStat(Trans &t, Trans &c, pbitset &reachable) { struct Stat { - State basin, len, eden, minState; - Stat() : basin(0), len(0), eden(0), minState(maxState) {} + State basin, len, eden, cycleRed; + explicit Stat() : basin(0), len(0), eden(0), cycleRed(0) {} }; - map<State, Stat> cyc; + map<State, StateIter> cycp; + vector<Stat> cycs; + // Is this cycle the canonical one? // How big is the basin of attraction? // How many garden of eden states does it contain? - iterState([&](int s) { - cyc[c[s]].basin++; - if (!reachable[s]) - cyc[c[s]].eden++; - }, (string) "basin & eden"); + { PerfPrinter perfPrinter((string) "cycle count"); + iterState([&](State s) { + auto cyc = c[s]; + auto i = cycp.find(cyc); + if (i == cycp.end()) { + if (cyc == canonize(cyc)) { + cycs.emplace_back(); + i = cycp.insert(make_pair(cyc, cycs.size() - 1)).first; + }else{ + i = cycp.insert(make_pair(cyc, nullState)).first; + } + } + auto si = i->second; + + if (si != nullState) { + cycs[si].basin++; + if (!reachable[s]) + cycs[si].eden++; + } + }, (string) "basin & eden"); } // how long is the cycle, what is the actual minimal state { PerfPrinter perfPrinter((string) "cycle length"); - for (auto i : cyc) { - Stat &s = cyc[i.first]; + for (auto i : cycp) { + if (i.second == nullState) continue; + auto &stat = cycs[i.second]; State cur, start; - cur = start = c[i.first]; + cur = start = t[i.first]; do { - s.len++; - s.minState = min(s.minState, cur); - cur=update(cur); + stat.len++; + cur = update(cur); } while (cur != start); - }} - - // find duplicates cycles (only bitshifted) - map<State, set<State>> ccyc; - { PerfPrinter perfPrinter((string) "find duplicate cycles"); - auto canonize = [](State s) { - State cs = s; - for (State i=0; i<logState; i++) - cs = min(cs, (((s<<i) | (s>>(logState-i))) & (maxState - 1))); - return cs; - }; - for (auto i : cyc) { - ccyc[canonize(i.second.minState)].insert(i.first); + // TODO: parallelize loop }} // print it - for (auto i : ccyc) { - Stat &s = cyc[*(i.second.begin())]; + for (auto i : cycp) { + if (i.second == nullState) continue; + auto &s = cycs[i.second]; cout << bitset<logState>(i.first) << "\t" - << i.second.size() << "\t" + << s.cycleRed << "\t" << s.len << "\t" << s.basin << "\t" << s.eden << "\t" @@ -167,12 +185,16 @@ int main(int argc, char **argv) { printTraj(atoi(argv[3]), atoi(argv[4])); } if (!strcmp(argv[2], "cycle")) { - Trans *t = mmalloc<Trans>(), - *c = mmalloc<Trans>(); - pbitset *r = mmalloc<pbitset>(); - init(*t); - findCycle(*t, *c, *r); - cycleStat(*c, *r); + Trans *t, *c; + pbitset *r; + { PerfPrinter perfPrinter((string) "allocating memory"); + t = mmalloc<Trans>(); + c = mmalloc<Trans>(); + r = mmalloc<pbitset>(); + } + init(*t, *c, *r); + findCycle(*t, *c); + cycleStat(*t, *c, *r); } return 0; } |