From 3f7d2140f99e61bea7c25396710fe4fd222a714d Mon Sep 17 00:00:00 2001 From: Jan Huwald Date: Fri, 14 Jun 2013 10:34:44 +0200 Subject: remove packed_array In addition to being broken by design it was order_s_ of magnitude to slow. Adding cores to the computation increased runtime. diff --git a/cacount.cpp b/cacount.cpp index 7abda2f..1437c66 100644 --- a/cacount.cpp +++ b/cacount.cpp @@ -10,9 +10,9 @@ #include #include #include +#include #include "mmalloc.hpp" -#include "packed_array.hpp" #include "timer.hpp" using namespace std; @@ -22,15 +22,10 @@ using boost::optional; #define BIT_WIDTH 30 #endif -typedef uint64_t State; +typedef typename boost::uint_t::least State; const State logState = BIT_WIDTH; const State maxState = (State) 1 << logState; -// # of bits of largest memory fetch issued; machine-specific; used to -// garantue that sub-byte sized access of different threads never -// address the same word -const uint maxWordSize = 128; - bitset<8> rule(110); State update(State s) { @@ -43,15 +38,14 @@ State update(State s) { return r; } -typedef packed_array Trans; -typedef packed_array pbitset; +typedef array Trans; +typedef array pbitset; void iterState(function f, optional msg = optional(), bool parallel = false) { PerfPrinter perfPrinter(msg); int numThreads=1; if (parallel) { - numThreads = min((State) thread::hardware_concurrency(), - maxState / maxWordSize); + numThreads = min((State) thread::hardware_concurrency(), maxState); } list tasks; for (int t=0; t(), *c = mmalloc(); - packed_array *r = mmalloc>(); + pbitset *r = mmalloc(); init(*t); findCycle(*t, *c, *r); cycleStat(*c, *r); diff --git a/packed_array.hpp b/packed_array.hpp deleted file mode 100644 index f4fa041..0000000 --- a/packed_array.hpp +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include -#include - -template struct bit_size; - -template::size> -struct packed_array { - typedef packed_array packed_array_t; - typedef uint64_t word_t; - const size_t word_sz = 8 * sizeof(word_t); - uint8_t data[(count * bit_sz + 7) / 8]; - - /// element access - - struct element { - element(word_t *base, uint shift) : base(base), shift(shift) {} - - T operator= (T val) { - word_t old_val, new_val; - do { - word_t mask = ~(((((word_t) 1) << bit_sz) - 1) << shift); - old_val = *((volatile word_t*) base); - new_val = (old_val & mask) ^ (val << shift); - } while (!__sync_bool_compare_and_swap(base, old_val, new_val)); - return val; - } - - operator T() { - word_t mask = (((((word_t) 1) << bit_sz) - 1) << shift); - return static_cast((*base & mask) >> shift); - } - - T operator() () { - return (T) (*this); - } - - word_t *base; - uint shift; - }; - - element operator[] (size_t i) { - size_t bit_addr = i * bit_sz; - return element((word_t*) (data + (bit_addr / 8)), - bit_addr % 8); - } - - /// simple iteration (for for-all loops) - - struct iterator { - packed_array_t &a; - size_t i; - - iterator(packed_array_t &a, size_t i) : a(a), i(i) {} - iterator& operator++() { ++i; return *this; } - element operator*() { return a[i]; } - bool operator != (iterator &o) { return (i != o.i) && (&a != &(o.a)); } - }; - - iterator begin() { return iterator(*this, 0); } - iterator end() { return iterator(*this, count); } -}; -- cgit v0.10.1