diff options
author | Jan Huwald <jh@sotun.de> | 2013-06-26 13:23:07 (GMT) |
---|---|---|
committer | Jan Huwald <jh@sotun.de> | 2013-06-26 13:23:07 (GMT) |
commit | ac11c3b784234d93cc10ca931a178bf50ee26faf (patch) | |
tree | 4b8bc9fc15a92a8828ea3abeb79d711a75eb4070 /keccak.cuh |
Throughput: 330 MB/s on one multiprocessor of a GTX680
Diffstat (limited to 'keccak.cuh')
-rw-r--r-- | keccak.cuh | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/keccak.cuh b/keccak.cuh new file mode 100644 index 0000000..68dbde1 --- /dev/null +++ b/keccak.cuh @@ -0,0 +1,43 @@ +#pragma once + +/* Compute a keccak-like hash in 1024 threads via values supplied + locally in each thread (never leaving register memory). The input + stream given by each warp has to be synchronised: the same amount of + data has to be hashed at the same time). + + Approach: + 1. Compute a keccak-hash of an input stream of the local warp until + end of data is reached. 25 (of the 32 threads) store one 32-bit + word each. + 2. Compute the Keccak-Hash of all those hashes by streaming them to + one warp via shared memory. +*/ + +#include <inttypes.h> + +namespace keccak { + +const uint32_t digest_words = 256 / 32; +struct Result { + uint32_t digest[digest_words]; +}; + +union State { + uint32_t + a[5][5], + s[25]; +}; + +struct SharedState { + Result digests[1024 / 32]; // digest from 32 warps +}; + +union RegisterState { + uint32_t a; +}; + +__device__ void init(RegisterState &s); +__device__ void add(RegisterState &s, uint32_t data); +__device__ void finish(RegisterState &s, SharedState &ss, Result &dst); + +} // namespace keccak |