/* This code is part of the tng compression routines. * * Written by Daniel Spangberg * Copyright (c) 2010, 2013, The GROMACS development team. * * * This program is free software; you can redistribute it and/or * modify it under the terms of the Revised BSD License. */ #include #include #include #include "../../include/compression/warnmalloc.h" #include "../../include/compression/bwt.h" #if 0 #define SHOWIT #endif #if 0 #define SHOWIT2 #endif static int compare_index(int i1,int i2,int nvals,unsigned int *vals,unsigned int *nrepeat) { int i,j; for (i=0; i>8); int k1=(int)(nrepeat[i1]&0xFFU); int repeat2=(int)(nrepeat[i2]>>8); int k2=(int)(nrepeat[i2]&0xFFU); if ((repeat1>1) && (repeat2>1) && (k1==k2)) { int maxskip=0; /* Yes. Compare the repeating patterns. */ for (j=0; jv2) return 1; } /* The repeating patterns are equal. Skip as far as we can before continuing. */ maxskip=repeat1; if (repeat2vals[i2]) return 1; i1++; if (i1>=nvals) i1=0; i2++; if (i2>=nvals) i2=0; } } return 0; } void Ptngc_bwt_merge_sort_inner(int *indices, int nvals,unsigned int *vals, int start, int end, unsigned int *nrepeat, int *workarray) { int middle; if ((end-start)>1) { middle=start+(end-start)/2; #if 0 printf("For start %d end %d obtained new middle: %d\n",start,end,middle); #endif Ptngc_bwt_merge_sort_inner(indices,nvals,vals, start,middle, nrepeat, workarray); Ptngc_bwt_merge_sort_inner(indices,nvals,vals, middle,end, nrepeat, workarray); #if 0 printf("For start %d end %d Before merge: Comparing element %d with %d\n",start,end,middle-1,middle); #endif if (compare_index(indices[middle-1],indices[middle],nvals,vals,nrepeat)>0) { /* Merge to work array. */ int i, n=end-start; int ileft=start; int iright=middle; for (i=0; i0) { workarray[i]=indices[iright]; iright++; } else { workarray[i]=indices[ileft]; ileft++; } } } /* Copy result back. */ memcpy(indices+start,workarray,(end-start)*sizeof(int)); } } } /* Burrows-Wheeler transform. */ void Ptngc_comp_to_bwt(unsigned int *vals, int nvals, unsigned int *output, int *index) { int i; int *indices=warnmalloc(2*nvals*sizeof *indices); unsigned int *nrepeat=warnmalloc(nvals*sizeof *nrepeat); int *warr=indices+nvals; if (nvals>0xFFFFFF) { fprintf(stderr,"BWT cannot pack more than %d values.\n",0xFFFFFF); exit(1); } /* Also note that repeat pattern k (kmax) cannot be larger than 255. */ #if 0 printf("Size of arrays is %.2f M\n",4*nvals*sizeof *indices/1024./1024.); #endif for (i=0; i=1; k--) { try_next_k: if (k>=1) { #ifdef SHOWIT printf("Trying k=%d at i=%d\n",k,i); #endif for (j=k; jmaxrepeat) new_j=j; if ((new_j>good_j) || ((new_j==good_j) && (knvals) repeat=nvals; nrepeat[i+m]=((unsigned int) (good_k)) | (((unsigned int) (repeat))<<8); } /* If no repetition was found for this value signal that here. */ if (!nrepeat[i]) nrepeat[i+m]=257U; /* This is 1<<8 | 1 */ } } #ifdef SHOWIT for (i=0; i>8)!=1) printf("String repeats at %d: %d %d\n",i,nrepeat[i]>>8,nrepeat[i]&0xFFU); #endif /* Sort cyclic shift matrix. */ Ptngc_bwt_merge_sort_inner(indices,nvals,vals,0,nvals,nrepeat,warr); #if 0 /* Test that it really is sorted. */ for (i=0; i=0; i--) { vals[i]=input[index]; index=p[index]+c[input[index]]; } free(p); free(c); }