From b86aa1b30de15131ab61325d733e70d6deeaf0be Mon Sep 17 00:00:00 2001 From: Jan Huwald Date: Fri, 14 Jun 2013 13:45:59 +0200 Subject: refractor, change cycle identification Major changes: - compute cycle id (cycle minimum) in parallel to future state - skip computation of cycle stats for non-canonical cycles Minor changes: - move computation of several arrays around to improve locality and reduce random memory reads - distinguish between State and StateIter: the later can contain maxState + 1 and is used in loops and as pointer. - rename maxState to numState, add acutal maxState - time performance of memory allocation diff --git a/cacount.cpp b/cacount.cpp index 61c8a67..3657687 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -22,9 +22,12 @@ using boost::optional; #define BIT_WIDTH 16 #endif -typedef typename boost::uint_t::least State; -const State logState = BIT_WIDTH; -const State maxState = (State) 1 << logState; +const uint8_t logState = BIT_WIDTH; +typedef typename boost::uint_t::least State; +typedef typename boost::uint_t::least StateIter; +const StateIter numState = (StateIter) 1 << logState; +const StateIter nullState = ~((StateIter) 0); +const State maxState = ~((State) 0); bitset<8> rule(110); @@ -38,20 +41,20 @@ State update(State s) { return r; } -typedef array Trans; -typedef array pbitset; +typedef array Trans; +typedef array pbitset; void iterState(function f, optional msg = optional(), bool parallel = false) { PerfPrinter perfPrinter(msg); int numThreads=1; if (parallel) { - numThreads = min((State) thread::hardware_concurrency(), maxState); + numThreads = min(thread::hardware_concurrency(), numState); } list tasks; for (int t=0; t f, optional msg = optional } } -void init(Trans &t) { - iterState([&](int s) { +void init(Trans &t, Trans &c, pbitset &reachable) { + iterState([&](State s) { t[s] = update(s); + c[s] = s; + reachable[t[s]] = 1; }, (string) "single step transition table", true); } -void findCycle(Trans &t, Trans &c, pbitset &reachable) { - // compute reachability - iterState([&](int s) { - reachable[t[s]] = 1; - }, (string) "reachability", true); - // forward to t=maxState; now every state is in a cycle - iterTrans(logState, [&](int s) { - t[s] = t[t[s]]; - }, (string) "fwd time"); - // compute loop id (lowest occuring state no): go through the loop again and - // record the lowest occuring state number - iterState([&](int s) { - c[s] = t[s]; - }, (string) "init cycles", true); - iterTrans(logState, [&](int s) { - c[s] = min(c[s], c[t[s]]); - t[s] = t[t[s]]; - }, (string) "cycles"); +void findCycle(Trans &t, Trans &c) { + // forward to t=numState; now every state is in a cycle + iterTrans(logState, [&](State s) { + State n = t[s]; + t[s] = t[n]; + c[s] = min(c[s], c[n]); + // TODO: test if writing only when value changed improves + // performance by reducing cache coherency pressure + // TODO: detect if anything change, skip later rounds then + }, (string) "fwd time", true); + // Transients may have a cycle id (minimum state) that is on the + // transient (not in the cycle) and thus different from the cycle id + // in the cycle. Thus we copy the cycle id from a state in the cycle + // to all states in the basin. + iterState([&](State s) { + c[s] = c[t[s]]; + }, (string) "fix transient cycle id", true); } -void cycleStat(Trans &c, pbitset &reachable) { +State canonize(State s) { + // TODO: test if a variant base on clz is faster + State cs = s; + for (int i=0; i(cs, (((s<>(logState-i))) & maxState)); + return cs; +}; + +void cycleStat(Trans &t, Trans &c, pbitset &reachable) { struct Stat { - State basin, len, eden, minState; - Stat() : basin(0), len(0), eden(0), minState(maxState) {} + State basin, len, eden, cycleRed; + explicit Stat() : basin(0), len(0), eden(0), cycleRed(0) {} }; - map cyc; + map cycp; + vector cycs; + // Is this cycle the canonical one? // How big is the basin of attraction? // How many garden of eden states does it contain? - iterState([&](int s) { - cyc[c[s]].basin++; - if (!reachable[s]) - cyc[c[s]].eden++; - }, (string) "basin & eden"); + { PerfPrinter perfPrinter((string) "cycle count"); + iterState([&](State s) { + auto cyc = c[s]; + auto i = cycp.find(cyc); + if (i == cycp.end()) { + if (cyc == canonize(cyc)) { + cycs.emplace_back(); + i = cycp.insert(make_pair(cyc, cycs.size() - 1)).first; + }else{ + i = cycp.insert(make_pair(cyc, nullState)).first; + } + } + auto si = i->second; + + if (si != nullState) { + cycs[si].basin++; + if (!reachable[s]) + cycs[si].eden++; + } + }, (string) "basin & eden"); } // how long is the cycle, what is the actual minimal state { PerfPrinter perfPrinter((string) "cycle length"); - for (auto i : cyc) { - Stat &s = cyc[i.first]; + for (auto i : cycp) { + if (i.second == nullState) continue; + auto &stat = cycs[i.second]; State cur, start; - cur = start = c[i.first]; + cur = start = t[i.first]; do { - s.len++; - s.minState = min(s.minState, cur); - cur=update(cur); + stat.len++; + cur = update(cur); } while (cur != start); - }} - - // find duplicates cycles (only bitshifted) - map> ccyc; - { PerfPrinter perfPrinter((string) "find duplicate cycles"); - auto canonize = [](State s) { - State cs = s; - for (State i=0; i>(logState-i))) & (maxState - 1))); - return cs; - }; - for (auto i : cyc) { - ccyc[canonize(i.second.minState)].insert(i.first); + // TODO: parallelize loop }} // print it - for (auto i : ccyc) { - Stat &s = cyc[*(i.second.begin())]; + for (auto i : cycp) { + if (i.second == nullState) continue; + auto &s = cycs[i.second]; cout << bitset(i.first) << "\t" - << i.second.size() << "\t" + << s.cycleRed << "\t" << s.len << "\t" << s.basin << "\t" << s.eden << "\t" @@ -167,12 +185,16 @@ int main(int argc, char **argv) { printTraj(atoi(argv[3]), atoi(argv[4])); } if (!strcmp(argv[2], "cycle")) { - Trans *t = mmalloc(), - *c = mmalloc(); - pbitset *r = mmalloc(); - init(*t); - findCycle(*t, *c, *r); - cycleStat(*c, *r); + Trans *t, *c; + pbitset *r; + { PerfPrinter perfPrinter((string) "allocating memory"); + t = mmalloc(); + c = mmalloc(); + r = mmalloc(); + } + init(*t, *c, *r); + findCycle(*t, *c); + cycleStat(*t, *c, *r); } return 0; } -- cgit v0.10.1