From 1a09f345860834fb1c2005e65948fa296b201e8b Mon Sep 17 00:00:00 2001 From: Magnus Lundborg Date: Mon, 21 Jul 2014 15:56:54 +0200 Subject: Improved TNG compression speed. Change-Id: I71c66c6b534cb402048dcd75e008d3db4bd3fb71 diff --git a/src/compression/bwt.c b/src/compression/bwt.c index 66d3ecf..681d66b 100644 --- a/src/compression/bwt.c +++ b/src/compression/bwt.c @@ -1,7 +1,7 @@ /* This code is part of the tng compression routines. * - * Written by Daniel Spangberg - * Copyright (c) 2010, 2013, The GROMACS development team. + * Written by Daniel Spangberg and Magnus Lundborg + * Copyright (c) 2010, 2013-2014 The GROMACS development team. * * * This program is free software; you can redistribute it and/or @@ -162,8 +162,9 @@ void Ptngc_comp_to_bwt(unsigned int *vals, int nvals, indices[i]=i; /* Find the length of the initial repeating pattern for the strings. */ /* First mark that the index does not have a found repeating string. */ - for (i=0; ipack_temporary_bits; - unsigned int pack_temporary=coder_inst->pack_temporary; - while (pack_temporary_bits>=8) + while (coder_inst->pack_temporary_bits>=8) { - unsigned int mask=~(0xFFU<<(pack_temporary_bits-8)); - unsigned char out=(unsigned char)(pack_temporary>>(pack_temporary_bits-8)); + unsigned int mask; + unsigned char out; + coder_inst->pack_temporary_bits-=8; + mask=~(0xFFU<<(coder_inst->pack_temporary_bits)); + out=(unsigned char)(coder_inst->pack_temporary>>(coder_inst->pack_temporary_bits)); **output=out; (*output)++; - pack_temporary_bits-=8; - pack_temporary&=mask; + coder_inst->pack_temporary&=mask; } - coder_inst->pack_temporary_bits=pack_temporary_bits; - coder_inst->pack_temporary=pack_temporary; } void DECLSPECDLLEXPORT Ptngc_write_pattern(struct coder *coder_inst, unsigned int pattern, @@ -102,11 +100,11 @@ void DECLSPECDLLEXPORT Ptngc_write32bits(struct coder *coder_inst,unsigned int v while (nbits>8) { /* Make room for the bits. */ + nbits-=8; coder_inst->pack_temporary<<=8; coder_inst->pack_temporary_bits+=8; - coder_inst->pack_temporary|=(value&mask)>>(nbits-8); + coder_inst->pack_temporary|=(value&mask)>>(nbits); Ptngc_out8bits(coder_inst,output_ptr); - nbits-=8; mask>>=8; } if (nbits) @@ -246,7 +244,6 @@ unsigned char DECLSPECDLLEXPORT *Ptngc_pack_array(struct coder *coder_inst, { int item=input[k*3*natoms+i*3+j]; pval[cnt++]=(unsigned int)(item+most_negative); - } if (speed>=5) bwlzh_compress(pval,n,output+4,length); diff --git a/src/compression/dict.c b/src/compression/dict.c index 24e6ae7..fdfe7a0 100644 --- a/src/compression/dict.c +++ b/src/compression/dict.c @@ -1,7 +1,7 @@ /* This code is part of the tng compression routines. * - * Written by Daniel Spangberg - * Copyright (c) 2010, 2013, The GROMACS development team. + * Written by Daniel Spangberg and Magnus Lundborg + * Copyright (c) 2010, 2013-2014 The GROMACS development team. * * * This program is free software; you can redistribute it and/or @@ -17,6 +17,7 @@ void Ptngc_comp_canonical_dict(unsigned int *dict, int *ndict) int i; for (i=0; i<0x20004; i++) dict[i]=i; + *ndict=0x20004; } @@ -26,18 +27,19 @@ void Ptngc_comp_make_dict_hist(unsigned int *vals, int nvals, { int i; int j=0; - for (i=0; i<0x20004; i++) - hist[i]=0; - for (i=0; i<0x20004; i++) - dict[i]=i; + + memset(hist, 0, sizeof(unsigned int)*0x20004); + for (i=0; i +#include #include "../../include/compression/warnmalloc.h" #include "../../include/compression/mtf.h" @@ -63,8 +64,9 @@ void Ptngc_comp_conv_to_mtf_partial(unsigned int *vals, int nvals, { unsigned char *tmp=warnmalloc(nvals*2); int i, j; - for (i=0; i #include +#include #include "../../include/compression/tng_compress.h" @@ -31,8 +32,14 @@ #endif /* gcc & x86_64 */ #endif /* TRAJNG X86 GCC INLINE MULDIV */ +#ifdef USE_WINDOWS +#define TNG_INLINE __inline +#else +#define TNG_INLINE inline +#endif + /* Multiply two 32 bit unsigned integers returning a 64 bit unsigned value (in two integers) */ -void Ptngc_widemul(unsigned int i1, unsigned int i2, unsigned int *ohi, unsigned int *olo) +TNG_INLINE void Ptngc_widemul(unsigned int i1, unsigned int i2, unsigned int *ohi, unsigned int *olo) { #if defined(TRAJNG_X86_GCC_INLINE_MULDIV) __asm__ __volatile__ ("mull %%edx\n\t" @@ -99,7 +106,7 @@ void Ptngc_widemul(unsigned int i1, unsigned int i2, unsigned int *ohi, unsigned /* Divide a 64 bit unsigned value in hi:lo with the 32 bit value i and return the result in result and the remainder in remainder */ -void Ptngc_widediv(unsigned int hi, unsigned int lo, unsigned int i, unsigned int *result, unsigned int *remainder) +TNG_INLINE void Ptngc_widediv(unsigned int hi, unsigned int lo, unsigned int i, unsigned int *result, unsigned int *remainder) { #if defined(TRAJNG_X86_GCC_INLINE_MULDIV) __asm__ __volatile__ ("divl %%ecx\n\t" @@ -163,7 +170,7 @@ void Ptngc_widediv(unsigned int hi, unsigned int lo, unsigned int i, unsigned in /* Add a unsigned int to a largeint. j determines which value in the largeint to add v1 to. */ -static void largeint_add_gen(unsigned int v1, unsigned int *largeint, int n, int j) +TNG_INLINE static void largeint_add_gen(const unsigned int v1, unsigned int *largeint, const int n, int j) { /* Add with carry. unsigned ints in C wrap modulo 2**bits when "overflowed". */ unsigned int v2=(v1+largeint[j])&0xFFFFFFFFU; /* Add and cap at 32 bits */ @@ -184,46 +191,50 @@ static void largeint_add_gen(unsigned int v1, unsigned int *largeint, int n, int } /* Add a unsigned int to a largeint. */ -void Ptngc_largeint_add(unsigned int v1, unsigned int *largeint, int n) +void Ptngc_largeint_add(const unsigned int v1, unsigned int *largeint, const int n) { largeint_add_gen(v1,largeint,n,0); } /* Multiply v1 with largeint_in and return result in largeint_out */ -void Ptngc_largeint_mul(unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, int n) +TNG_INLINE void Ptngc_largeint_mul(const unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, const int n) { int i; - for (i=0; i64 mul */ largeint_add_gen(lo,largeint_out,n,i); - if (i+164 mul */ + largeint_add_gen(lo,largeint_out,n,i); + } } /* Return the remainder from dividing largeint_in with v1. Result of the division is returned in largeint_out */ -unsigned int Ptngc_largeint_div(unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, int n) +TNG_INLINE unsigned int Ptngc_largeint_div(const unsigned int v1, unsigned int *largeint_in, unsigned int *largeint_out, const int n) { unsigned int result,remainder=0; int i; - unsigned int hi, lo; + unsigned int hi; /* Boot */ hi=0U; i=n; while (i) { - lo=largeint_in[i-1]; - Ptngc_widediv(hi,lo,v1,&result,&remainder); - largeint_out[i-1]=result; - hi=remainder; i--; + Ptngc_widediv(hi,largeint_in[i],v1,&result,&remainder); + largeint_out[i]=result; + hi=remainder; } return remainder; } diff --git a/src/compression/xtc2.c b/src/compression/xtc2.c index bbd45e9..e5b688a 100644 --- a/src/compression/xtc2.c +++ b/src/compression/xtc2.c @@ -1,7 +1,7 @@ /* This code is part of the tng compression routines. * - * Written by Daniel Spangberg - * Copyright (c) 2010, 2013, The GROMACS development team. + * Written by Daniel Spangberg and Magnus Lundborg + * Copyright (c) 2010, 2013-2014 The GROMACS development team. * * * This program is free software; you can redistribute it and/or @@ -26,6 +26,12 @@ /* Generated by gen_magic.py */ #define MAX_MAGIC 92 +#ifdef USE_WINDOWS +#define TNG_INLINE __inline +#else +#define TNG_INLINE inline +#endif + static unsigned int magic[MAX_MAGIC]={ 2U, 3U, 4U, 5U, 6U, 8U, 10U, 12U, @@ -159,20 +165,43 @@ static const double iflipgaincheck=0.89089871814033927; /* 1./(2**(1./6)) */ #define SHOWIT #endif +#ifdef USE_WINDOWS +#define TNG_INLINE __inline +#else +#define TNG_INLINE inline +#endif + int Ptngc_magic(unsigned int i) { return magic[i]; } -int Ptngc_find_magic_index(unsigned int maxval) +TNG_INLINE int Ptngc_find_magic_index(const unsigned int maxval) { - int i=0; + int i; + + if(maxval > magic[MAX_MAGIC/4]) + { + if(maxval > magic[MAX_MAGIC/2]) + { + i = MAX_MAGIC/2 + 1; + } + else + { + i = MAX_MAGIC/4 + 1; + } + } + else + { + i = 0; + } + while (magic[i]<=maxval) i++; return i; } -static unsigned int positive_int(int item) +TNG_INLINE static unsigned int positive_int(const int item) { int s=0; if (item>0) @@ -182,7 +211,7 @@ static unsigned int positive_int(int item) return s; } -static int unpositive_int(int val) +TNG_INLINE static int unpositive_int(const int val) { int s=(val+1)/2; if ((val%2)==0) @@ -438,23 +467,26 @@ static int compute_magic_bits(int *index) /* Convert a sequence of (hopefully) small positive integers using the base pointed to by index (x base, y base and z base can be different). The largest number of integers supported is 18 (29 to handle/detect overflow) */ -static void trajcoder_base_compress(int *input, int n, int *index, unsigned char *result) +static void trajcoder_base_compress(int *input, const int n, int *index, unsigned char *result) { unsigned int largeint[19]; unsigned int largeint_tmp[19]; - int i,j; - for (i=0; i<19; i++) - largeint[i]=0U; + int i, j; + + memset(largeint, 0U, sizeof(unsigned int) * 19); - for (i=0; i 0) { - if (i!=0) - { - /* We must do the multiplication of the largeint with the integer base */ - Ptngc_largeint_mul(magic[index[i%3]],largeint,largeint_tmp,19); - for (j=0; j<19; j++) - largeint[j]=largeint_tmp[j]; - } + Ptngc_largeint_add(input[0],largeint,19); + } + + for (i=1; imaxint[j]) - maxint[j]=input[i*3+j]; - if (input[i*3+j]maxint[j]) + maxint[j]=input[i*3+j]; + if (input[i*3+j]0) @@ -74,7 +80,7 @@ static unsigned int positive_int(int item) return s; } -static int unpositive_int(int val) +TNG_INLINE static int unpositive_int(int val) { int s=(val+1)/2; if ((val%2)==0) @@ -574,15 +580,15 @@ static int base_bytes(unsigned int base, int n) unsigned int largeint[MAXMAXBASEVALS+1]; unsigned int largeint_tmp[MAXMAXBASEVALS+1]; int numbytes=0; - for (i=0; i0 && iinstr