/* This code is part of the tng compression routines. * * Written by Daniel Spangberg * Copyright (c) 2010, 2013, The GROMACS development team. * * * This program is free software; you can redistribute it and/or * modify it under the terms of the Revised BSD License. */ #include #include #include #include "../../include/compression/warnmalloc.h" #include "../../include/compression/bwt.h" #include "../../include/compression/lz77.h" /* This is a simple Lempel-Ziv-77 compressor. It has not been set up to remove every possible repeating pattern, but it might be better than simple RLE. Lempel-Ziv 77 with separate outputs for length, data, and offsets. */ #if 0 /* Sort the strings (similar to BWT) to find similar patterns in the input data. The output is an array with two values for each input value. The sorted index value is the first in each doublet. The second value is the "inverse" of the first value and can be used to find the locations of similar strings. */ static void sort_strings(unsigned int *vals, int nvals, unsigned int *output) { int i; int *indices=warnmalloc(2*nvals*sizeof *indices); unsigned int *nrepeat=warnmalloc(nvals*sizeof *nrepeat); int *warr=indices+nvals; if (nvals>0xFFFFFF) { fprintf(stderr,"BWT cannot pack more than %d values.\n",0xFFFFFF); exit(1); } /* Also note that repeat pattern k (kmax) cannot be larger than 255. */ for (i=0; i=1; k--) { try_next_k: if (k>=1) { for (j=k; jmaxrepeat) new_j=j; if ((new_j>good_j) || ((new_j==good_j) && (knvals) repeat=nvals; nrepeat[i+m]=((unsigned int) (good_k)) | (((unsigned int) (repeat))<<8); } /* If no repetition was found for this value signal that here. */ if (!nrepeat[i]) nrepeat[i+m]=257U; /* This is 1<<8 | 1 */ } } /* Sort cyclic shift matrix. */ Ptngc_bwt_merge_sort_inner(indices,nvals,vals,0,nvals,nrepeat,warr); /* Form output. */ for (i=0; iNUM_PREVIOUS) previous[(NUM_PREVIOUS+3)*v]=NUM_PREVIOUS; previous[(NUM_PREVIOUS+3)*v+3+previous[(NUM_PREVIOUS+3)*v+1]]=i; previous[(NUM_PREVIOUS+3)*v+1]++; if (previous[(NUM_PREVIOUS+3)*v+1]>=NUM_PREVIOUS) previous[(NUM_PREVIOUS+3)*v+1]=0; } previous[(NUM_PREVIOUS+3)*v+2]=i; } void Ptngc_comp_to_lz77(unsigned int *vals, const int nvals, unsigned int *data, int *ndata, unsigned int *len, int *nlens, unsigned int *offsets, int *noffsets) { int noff=0; int ndat=0; int nlen=0; int i,j; int *previous=warnmalloc(0x20000*(NUM_PREVIOUS+3)*sizeof *previous); #if 0 unsigned int *info=warnmalloc(2*nvals*sizeof *info); sort_strings(vals,nvals,info); #endif for (i=0; i<0x20000; i++) { previous[(NUM_PREVIOUS+3)*i]=0; /* Number of items in a circular buffer */ previous[(NUM_PREVIOUS+3)*i+1]=0; /* Pointer to beginning of circular buffer. */ previous[(NUM_PREVIOUS+3)*i+2]=-2; /* Last offset that had this value. -2 is really never... */ } for (i=0; i=firstoffset) { for (k=0; i+klargest_len) && ((k>=(i-j)+16) || ((k>4) && (i-j==1)))) { largest_len=k; largest_offset=j; } } j++; } } #if 0 /* Search in sorted string buffer too. */ kmin=info[i*2+1]-MAX_STRING_SEARCH; kmax=info[i*2+1]+MAX_STRING_SEARCH; if (kmin<0) kmin=0; if (kmax>=nvals) kmax=nvals; for (k=kmin; k=i)) { for (m=0; i+mlargest_len) && (m>4) && (m+2>=(i-s))) { largest_len=m; largest_offset=s; #if 0 fprintf(stderr,"Offset: %d %d\n",m,i-s); #endif } } } #endif /* Check how to write this info. */ if (largest_len>MAX_LEN) largest_len=MAX_LEN; if (largest_len) { if (i-largest_offset==1) { data[ndat++]=0; } else { data[ndat++]=1; offsets[noff++]=i-largest_offset; } len[nlen++]=largest_len; #if 0 fprintf(stderr,"l:o: %d:%d data=%d i=%d\n",largest_len,i-largest_offset,ndat,i); fflush(stderr); #endif #if 0 fprintf(stderr,"Found largest len %d at %d.\n",largest_len,i-largest_offset); #endif /* Add these values to the circular buffer. */ for (k=0; k=nvals) { fprintf(stderr,"too many vals.\n"); exit(EXIT_FAILURE); } i++; } } else vals[i++]=v-2; } }