From 9ee2ce3c0d78f4ec0c8dee6bba00cac6e8b2f339 Mon Sep 17 00:00:00 2001 From: Hal Finkel <hfinkel@anl.gov> Date: Fri, 28 Sep 2018 16:32:55 -0500 Subject: [PATCH] importing new SZ files --- thirdparty/SZ/COPYRIGHT.txt | 3 +- .../SZ/sz/include/TightDataPointStorageD.h | 6 + .../SZ/sz/include/TightDataPointStorageF.h | 5 + thirdparty/SZ/sz/include/TypeManager.h | 2 + thirdparty/SZ/sz/include/callZlib.h | 2 + thirdparty/SZ/sz/include/dataCompression.h | 12 + thirdparty/SZ/sz/include/pastriD.h | 274 +- thirdparty/SZ/sz/include/pastriF.h | 274 +- thirdparty/SZ/sz/include/sz.h | 27 +- thirdparty/SZ/sz/include/sz_double.h | 6 + thirdparty/SZ/sz/include/sz_double_pwr.h | 4 + thirdparty/SZ/sz/include/sz_float.h | 8 + thirdparty/SZ/sz/include/sz_float_pwr.h | 4 + thirdparty/SZ/sz/include/szd_double.h | 2 + thirdparty/SZ/sz/include/szd_double_pwr.h | 4 + thirdparty/SZ/sz/include/szd_float.h | 5 + thirdparty/SZ/sz/include/szd_float_pwr.h | 3 + thirdparty/SZ/sz/include/utility.h | 43 + thirdparty/SZ/sz/src/ByteToolkit.c | 20 +- thirdparty/SZ/sz/src/DynamicDoubleArray.c | 2 +- thirdparty/SZ/sz/src/DynamicFloatArray.c | 2 +- thirdparty/SZ/sz/src/DynamicIntArray.c | 2 +- thirdparty/SZ/sz/src/Huffman.c | 17 +- thirdparty/SZ/sz/src/TightDataPointStorageD.c | 52 +- thirdparty/SZ/sz/src/TightDataPointStorageF.c | 59 +- thirdparty/SZ/sz/src/TypeManager.c | 70 +- thirdparty/SZ/sz/src/callZlib.c | 22 + thirdparty/SZ/sz/src/conf.c | 55 +- thirdparty/SZ/sz/src/dataCompression.c | 278 +- thirdparty/SZ/sz/src/sz.c | 93 +- thirdparty/SZ/sz/src/sz_double.c | 2121 ++++++++++- thirdparty/SZ/sz/src/sz_double_pwr.c | 186 + thirdparty/SZ/sz/src/sz_double_ts.c | 2 +- thirdparty/SZ/sz/src/sz_float.c | 2875 ++++++++++++++- thirdparty/SZ/sz/src/sz_float_pwr.c | 188 + thirdparty/SZ/sz/src/sz_int16.c | 5 +- thirdparty/SZ/sz/src/sz_int32.c | 5 +- thirdparty/SZ/sz/src/sz_int64.c | 5 +- thirdparty/SZ/sz/src/sz_int8.c | 5 +- thirdparty/SZ/sz/src/sz_uint16.c | 5 +- thirdparty/SZ/sz/src/sz_uint32.c | 5 +- thirdparty/SZ/sz/src/sz_uint64.c | 5 +- thirdparty/SZ/sz/src/sz_uint8.c | 5 +- thirdparty/SZ/sz/src/szd_double.c | 2801 ++++++++++++++- thirdparty/SZ/sz/src/szd_double_pwr.c | 74 + thirdparty/SZ/sz/src/szd_float.c | 3141 ++++++++++++++++- thirdparty/SZ/sz/src/szd_float_pwr.c | 71 + thirdparty/SZ/sz/src/szd_int16.c | 9 +- thirdparty/SZ/sz/src/szd_int32.c | 7 +- thirdparty/SZ/sz/src/szd_int64.c | 7 +- thirdparty/SZ/sz/src/szd_int8.c | 7 +- thirdparty/SZ/sz/src/szd_uint16.c | 7 +- thirdparty/SZ/sz/src/szd_uint32.c | 7 +- thirdparty/SZ/sz/src/szd_uint64.c | 7 +- thirdparty/SZ/sz/src/szd_uint8.c | 7 +- thirdparty/SZ/sz/src/szf.c | 51 +- 56 files changed, 12399 insertions(+), 565 deletions(-) create mode 100644 thirdparty/SZ/sz/include/utility.h diff --git a/thirdparty/SZ/COPYRIGHT.txt b/thirdparty/SZ/COPYRIGHT.txt index de90efe..8baddd9 100644 --- a/thirdparty/SZ/COPYRIGHT.txt +++ b/thirdparty/SZ/COPYRIGHT.txt @@ -1,8 +1,9 @@ Copyright © 2016 , UChicago Argonne, LLC All Rights Reserved -[SZ, Version 1.3] +[SZ, Version 1.4] Sheng Di Dingwen Tao +Xin Liang Franck Cappello Argonne National Laboratory diff --git a/thirdparty/SZ/sz/include/TightDataPointStorageD.h b/thirdparty/SZ/sz/include/TightDataPointStorageD.h index 4fc5be8..0863fb0 100644 --- a/thirdparty/SZ/sz/include/TightDataPointStorageD.h +++ b/thirdparty/SZ/sz/include/TightDataPointStorageD.h @@ -23,6 +23,8 @@ typedef struct TightDataPointStorageD char reqLength; char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression" + double minLogValue; + int stateNum; int allNodes; @@ -52,6 +54,10 @@ typedef struct TightDataPointStorageD unsigned char* pwrErrBoundBytes; int pwrErrBoundBytes_size; + + unsigned char* raBytes; + size_t raBytes_size; + } TightDataPointStorageD; void new_TightDataPointStorageD_Empty(TightDataPointStorageD **self); diff --git a/thirdparty/SZ/sz/include/TightDataPointStorageF.h b/thirdparty/SZ/sz/include/TightDataPointStorageF.h index eca1717..7e5df7d 100644 --- a/thirdparty/SZ/sz/include/TightDataPointStorageF.h +++ b/thirdparty/SZ/sz/include/TightDataPointStorageF.h @@ -34,6 +34,8 @@ typedef struct TightDataPointStorageF unsigned char* rtypeArray; size_t rtypeArray_size; + float minLogValue; + unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1) size_t typeArray_size; @@ -55,6 +57,9 @@ typedef struct TightDataPointStorageF unsigned char* pwrErrBoundBytes; int pwrErrBoundBytes_size; + unsigned char* raBytes; + size_t raBytes_size; + } TightDataPointStorageF; void new_TightDataPointStorageF_Empty(TightDataPointStorageF **self); diff --git a/thirdparty/SZ/sz/include/TypeManager.h b/thirdparty/SZ/sz/include/TypeManager.h index 6be71f8..7c543d3 100644 --- a/thirdparty/SZ/sz/include/TypeManager.h +++ b/thirdparty/SZ/sz/include/TypeManager.h @@ -19,8 +19,10 @@ extern "C" { //TypeManager.c size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result); +size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result); void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); +size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result); void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); diff --git a/thirdparty/SZ/sz/include/callZlib.h b/thirdparty/SZ/sz/include/callZlib.h index 0622d98..1aede54 100644 --- a/thirdparty/SZ/sz/include/callZlib.h +++ b/thirdparty/SZ/sz/include/callZlib.h @@ -19,6 +19,8 @@ extern "C" { #include <stdio.h> +int isZlibFormat(unsigned char magic1, unsigned char magic2); + //callZlib.c unsigned long zlib_compress(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); unsigned long zlib_compress2(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level); diff --git a/thirdparty/SZ/sz/include/dataCompression.h b/thirdparty/SZ/sz/include/dataCompression.h index 1eb0f30..6a2da0b 100644 --- a/thirdparty/SZ/sz/include/dataCompression.h +++ b/thirdparty/SZ/sz/include/dataCompression.h @@ -77,6 +77,18 @@ int computeBlockEdgeSize_3D(int segmentSize); int computeBlockEdgeSize_2D(int segmentSize); int initRandomAccessBytes(unsigned char* raBytes); +int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData); +int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, float medianValue); + +void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData); + +int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData); +int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, double medianValue); + +void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/pastriD.h b/thirdparty/SZ/sz/include/pastriD.h index 1a881e6..3ee2813 100644 --- a/thirdparty/SZ/sz/include/pastriD.h +++ b/thirdparty/SZ/sz/include/pastriD.h @@ -10,11 +10,11 @@ static inline int64_t pastri_double_quantize(double x, double binSize){ half.d=0.5; - //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); - //printf("sign(x):0x%lx\n", x); - //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); +// //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); +// //printf("sign(x):0x%lx\n", x); +// //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); - //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); +// //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); return (int64_t)(x + half.d); } @@ -26,10 +26,10 @@ static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastr bp->nonZeros=0; int i,sb; for(i=0;i<p->bSize;i++){ - //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG +// //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG if(abs_FastD(data[i])>p->usedEb){ bp->nonZeros++; - //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG + ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG } if(abs_FastD(data[i])>absExt){ absExt=abs_FastD(data[i]); @@ -42,40 +42,40 @@ static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastr double patternExt=data[extIdx]; bp->binSize=2*p->usedEb; - //if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG - //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG + ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG + ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG - //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG + ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! for(i=0;i<p->sbSize;i++){ patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize); - if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} + //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} } bp->patternBits=bitsNeeded_double((abs_FastD(patternExt)/bp->binSize)+1)+1; bp->scaleBits=bp->patternBits; bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1); - //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG - //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG - if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG + ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG + ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG + //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG //Calculate Scales. //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! int patternExtZero=(patternExt==0); - //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG + ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG for(sb=0;sb<p->sbNum;sb++){ //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; //assert(scales[sb]<=1); scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); - if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} + //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} } - //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG + ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. @@ -92,7 +92,7 @@ static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastr double absECQ=abs_FastD(ECQ[_1DIdx]); if(absECQ > bp->ECQExt) bp->ECQExt=absECQ; - //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG switch (ECQ[_1DIdx]){ case 0: //ECQ0s++; //Currently not needed @@ -117,8 +117,8 @@ static inline void pastri_double_PatternMatch(double*data,pastri_params* p,pastr _1DIdx=sb*p->sbSize+i; double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ - printf("p->usedEb=%.6e\n",p->usedEb); - printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); + //printf("p->usedEb=%.6e\n",p->usedEb); + //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); assert(0); } } @@ -174,8 +174,8 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; - if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG - if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG + //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG + //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG //**************************************************************************************** //if(0){ //DEBUG @@ -183,8 +183,8 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data *numOutBytes=UCSparseBytes; - if(D_G){printf("UCSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + //if(D_G){printf("UCSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG outBuf[0]=0; //mode //*(uint16_t*)(&outBuf[9])=nonZeros; @@ -216,7 +216,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* } } - if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG + //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG //**************************************************************************************** //}else if(0){ //DEBUG @@ -224,23 +224,23 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data *numOutBytes=UCNonSparseBytes; - if(D_G){printf("UCNonSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + //if(D_G){printf("UCNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG outBuf[0]=1; //mode //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); memcpy(&outBuf[1], data, p->bSize*p->dataSize); - if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG + //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG /* for(i=0;i<UCNonSparseBytes-17;i++){ - printf("%d ",inBuf[p->bSize*8+i]); + //printf("%d ",inBuf[p->bSize*8+i]); } - printf("\n"); + //printf("\n"); for(i=0;i<UCNonSparseBytes-17;i++){ - printf("%d ",outBuf[17+i]); + //printf("%d ",outBuf[17+i]); } - printf("\n"); + //printf("\n"); */ //**************************************************************************************** //}else if(1){ //DEBUG @@ -248,9 +248,9 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} *numOutBytes=CSparseBytes; - if(D_G){printf("CSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG - //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + //if(D_G){printf("CSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG outBuf[0]=2; //mode ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. @@ -269,17 +269,17 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* //Now, we are at the end of 9th byte. bitPos=9*8; - //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbSize;i++){ writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point } - //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbNum;i++){ writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale } - //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG - //if(DEBUG)printf("ECQBits:%d\n",ECQBits); + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); switch(bp->ECQBits){ case 2: for(i=0;i<p->bSize;i++){ @@ -287,7 +287,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* case 0: break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2,0x10); //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 @@ -295,7 +295,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* writeBits_Fast(outBuf,&bitPos,1,0);//0x00 break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2,0x11); //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 @@ -314,7 +314,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* case 0: break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 //writeBits_Fast(outBuf,&bitPos,1,0); @@ -322,7 +322,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* writeBits_Fast(outBuf,&bitPos,1,0); break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 //writeBits_Fast(outBuf,&bitPos,1,0); @@ -330,7 +330,7 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* writeBits_Fast(outBuf,&bitPos,1,1); break; default: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits)); @@ -343,15 +343,15 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* break; } - //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG uint32_t bytePos=(bitPos+7)/8; //*(uint32_t*)(&outBuf[9])=bytePos; *(uint32_t*)(&outBuf[1])=bytePos; - if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG + //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG if(D_G){assert(bitPos==CSparseBits);} //**************************************************************************************** @@ -359,9 +359,9 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* }else { //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} *numOutBytes=CNonSparseBytes; - if(D_G){printf("CNonSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG - //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + //if(D_G){printf("CNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG outBuf[0]=3; //mode ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. @@ -374,33 +374,33 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* outBuf[6]=bp->ECQBits; bitPos=7*8; //Currently, we are at the end of 7th byte. - //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbSize;i++){ writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point } - //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbNum;i++){ writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale } - //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG - //if(DEBUG)printf("ECQBits:%d\n",ECQBits); + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); switch(bp->ECQBits){ case 2: for(i=0;i<p->bSize;i++){ switch(ECQ[i]){ case 0: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,1,1);//0x1 break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); @@ -412,60 +412,60 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* } break; default: //ECQBits>2 - //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG for(i=0;i<p->bSize;i++){ - //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG - //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG - //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG switch(ECQ[i]){ case 0: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; writeBits_Fast(outBuf,&bitPos,1,1); //0x1 //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; default: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; } } break; } - //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG @@ -473,20 +473,20 @@ static inline void pastri_double_Encode(double *data,int64_t* patternQ,int64_t* //*(uint32_t*)(&outBuf[9])=bytePos; *(uint32_t*)(&outBuf[1])=bytePos; - if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG + //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG if(D_G){assert(bitPos==CNonSparseBits);} } - //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG + ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG } static inline int pastri_double_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ pastri_blockParams bp; - if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG - if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG - if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG - if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG + //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG + //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG + //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG + //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG int64_t patternQ[MAX_PS_SIZE]; int64_t scalesQ[MAX_PS_SIZE]; @@ -566,7 +566,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas switch(inBuf[0]){ //R:UCSparse case 0: - if(D_G){printf("\nDC:UCSparse\n");} //DEBUG + //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG //bp->nonZeros=*(uint16_t*)(&inBuf[9]); //bytePos=11; bp->nonZeros=*(uint16_t*)(&inBuf[1]); @@ -591,19 +591,19 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas data[_1DIdx]=*(double*)(&inBuf[bytePos]); bytePos+=8; } - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG break; //R:UCNonSparse case 1: - if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG + //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); memcpy(data, &inBuf[1], p->bSize*8); bytePos=p->bSize*8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG break; //R:CSparse case 2: - if(D_G){printf("\nDC:CSparse\n");} //DEBUG + //if(D_G){printf("\nDC:CSparse\n");} //DEBUG //for(j=0;j<p->bSize;j++){ // data[j]=0; //} @@ -614,27 +614,27 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas bp->patternBits=inBuf[5]; bp->ECQBits=inBuf[6]; - if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG //bp->numOutliers=*(uint16_t*)(&inBuf[15]); //bitPos=17*8; bp->numOutliers=*(uint16_t*)(&inBuf[7]); bitPos=9*8; - if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG + //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1); bp->binSize=p->usedEb*2; - if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG for(j=0;j<p->sbSize;j++){ patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point - if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} } for(j=0;j<p->sbNum;j++){ scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale - if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} } /* //Splitting @@ -648,13 +648,13 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas switch(bp->ECQBits){ case 2: for(j=0;j<bp->numOutliers;j++){ - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); ECQTemp=readBits_I64(inBuf,&bitPos,1); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -662,32 +662,32 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting ECQ[_1DIdx]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; default: //bp->ECQBits>2 - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG for(j=0;j<bp->numOutliers;j++){ _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; temp=readBits_UI64(inBuf,&bitPos,1); - //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG + ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG switch(temp){ case 0: //+-1 ECQTemp=readBits_I64(inBuf,&bitPos,1); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); break; case 1: //Others ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); break; //default: - // printf("ERROR: Bad 2-bit value: 0x%lx",temp); + //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); // assert(0); //AMG // break; } @@ -695,7 +695,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting ECQ[_1DIdx]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; } @@ -704,7 +704,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); bytePos=(bitPos+7)/8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ); @@ -712,7 +712,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas break; //R:CNonSparse case 3: - if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG + //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG //for(j=0;j<p->bSize;j++){ // data[j]=0; @@ -724,7 +724,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas bp->patternBits=inBuf[5]; bp->ECQBits=inBuf[6]; - if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG //bitPos=15*8; bitPos=7*8; @@ -732,27 +732,27 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1); bp->binSize=p->usedEb*2; - if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG for(j=0;j<p->sbSize;j++){ patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point - if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} } for(j=0;j<p->sbNum;j++){ scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale - if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} } /* //Splitting for(j=0;j<p->bSize;j++){ data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; - //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} + ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} } */ switch(bp->ECQBits){ case 2: for(j=0;j<p->bSize;j++){ - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); temp=readBits_UI64(inBuf,&bitPos,1); switch(temp){ @@ -768,7 +768,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas break; } - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -776,39 +776,39 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas //data[j]-=ECQTemp*bp->binSize; //Splitting ECQ[j]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; default: //bp->ECQBits>2 - //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); + ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); for(j=0;j<p->bSize;j++){ - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); temp=readBits_UI64(inBuf,&bitPos,1); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG switch(temp){ case 0: - //if(DEBUG)printf("Read:0"); + ////if(DEBUG)printf("Read:0"); temp2=readBits_UI64(inBuf,&bitPos,1); switch(temp2){ case 0: - //if(DEBUG)printf("0"); + ////if(DEBUG)printf("0"); ECQTemp=readBits_I64(inBuf,&bitPos,1); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; case 1: - //if(DEBUG)printf("1\n"); + ////if(DEBUG)printf("1\n"); ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; default: assert(0); @@ -816,16 +816,16 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas } break; case 1: - //if(DEBUG)printf("Read:1\n"); + ////if(DEBUG)printf("Read:1\n"); ECQTemp=0; - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; default: assert(0); break; } - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -833,7 +833,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas //data[j]-=ECQTemp*bp->binSize; //Splitting ECQ[j]=ECQTemp; - //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG + ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG } break; } @@ -841,7 +841,7 @@ static inline void pastri_double_Decode(unsigned char*inBuf,pastri_params *p,pas //patternQ=(int64_t*)(inBuf+15); //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); bytePos=(bitPos+7)/8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) pastri_double_PredictData(p,bp,data,patternQ,scalesQ,ECQ); @@ -879,19 +879,19 @@ static inline int pastri_double_Check(unsigned char*inBuf,int dataSize,unsigned /* for(i=0;i<p->bSize;i++){ if(idx0[i]!=idx0_dc[i]){ - printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); + //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); assert(0); } if(idx1[i]!=idx1_dc[i]){ - printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); + //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); assert(0); } if(idx2[i]!=idx2_dc[i]){ - printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); + //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); assert(0); } if(idx3[i]!=idx3_dc[i]){ - printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); + //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); assert(0); } } @@ -900,7 +900,7 @@ static inline int pastri_double_Check(unsigned char*inBuf,int dataSize,unsigned //Comparing Data: for(i=0;i<p->bSize;i++){ if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ - printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); + //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); assert(0); } } diff --git a/thirdparty/SZ/sz/include/pastriF.h b/thirdparty/SZ/sz/include/pastriF.h index 08c9c14..5c1d587 100644 --- a/thirdparty/SZ/sz/include/pastriF.h +++ b/thirdparty/SZ/sz/include/pastriF.h @@ -10,11 +10,11 @@ static inline int64_t pastri_float_quantize(float x, float binSize){ half.d=0.5; - //printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); - //printf("sign(x):0x%lx\n", x); - //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); + ////printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); + ////printf("sign(x):0x%lx\n", x); + ////printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); - //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); + ////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); return (int64_t)(x + half.d); } @@ -26,10 +26,10 @@ static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_ bp->nonZeros=0; int i,sb; for(i=0;i<p->bSize;i++){ - //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG + ////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG if(abs_FastD(data[i])>p->usedEb){ bp->nonZeros++; - //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG + ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG } if(abs_FastD(data[i])>absExt){ absExt=abs_FastD(data[i]); @@ -42,40 +42,40 @@ static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_ float patternExt=data[extIdx]; bp->binSize=2*p->usedEb; - //if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG - //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG + ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG + ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG - //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG + ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! for(i=0;i<p->sbSize;i++){ patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize); - if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} + //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} } bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1; bp->scaleBits=bp->patternBits; bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1); - //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG - //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG - if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG + ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG + ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG + //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG //Calculate Scales. //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! int patternExtZero=(patternExt==0); - //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG + ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG for(sb=0;sb<p->sbNum;sb++){ //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; //assert(scales[sb]<=1); scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); - if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} + //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} } - //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG + ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. @@ -92,7 +92,7 @@ static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_ float absECQ=abs_FastD(ECQ[_1DIdx]); if(absECQ > bp->ECQExt) bp->ECQExt=absECQ; - //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG switch (ECQ[_1DIdx]){ case 0: //ECQ0s++; //Currently not needed @@ -117,8 +117,8 @@ static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_ _1DIdx=sb*p->sbSize+i; float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ - printf("p->usedEb=%.6e\n",p->usedEb); - printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); + //printf("p->usedEb=%.6e\n",p->usedEb); + //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); assert(0); } } @@ -174,8 +174,8 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; - if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG - if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG + //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG + //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG //**************************************************************************************** //if(0){ //DEBUG @@ -183,8 +183,8 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data *numOutBytes=UCSparseBytes; - if(D_G){printf("UCSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + //if(D_G){printf("UCSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG outBuf[0]=0; //mode //*(uint16_t*)(&outBuf[9])=nonZeros; @@ -216,7 +216,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc } } - if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG + //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG //**************************************************************************************** //}else if(0){ //DEBUG @@ -224,23 +224,23 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data *numOutBytes=UCNonSparseBytes; - if(D_G){printf("UCNonSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + //if(D_G){printf("UCNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG outBuf[0]=1; //mode //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); memcpy(&outBuf[1], data, p->bSize*p->dataSize); - if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG + //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG /* for(i=0;i<UCNonSparseBytes-17;i++){ - printf("%d ",inBuf[p->bSize*8+i]); + //printf("%d ",inBuf[p->bSize*8+i]); } - printf("\n"); + //printf("\n"); for(i=0;i<UCNonSparseBytes-17;i++){ - printf("%d ",outBuf[17+i]); + //printf("%d ",outBuf[17+i]); } - printf("\n"); + //printf("\n"); */ //**************************************************************************************** //}else if(1){ //DEBUG @@ -248,9 +248,9 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} *numOutBytes=CSparseBytes; - if(D_G){printf("CSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG - //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + //if(D_G){printf("CSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG outBuf[0]=2; //mode ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. @@ -269,17 +269,17 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc //Now, we are at the end of 9th byte. bitPos=9*8; - //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbSize;i++){ writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point } - //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbNum;i++){ writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale } - //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG - //if(DEBUG)printf("ECQBits:%d\n",ECQBits); + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); switch(bp->ECQBits){ case 2: for(i=0;i<p->bSize;i++){ @@ -287,7 +287,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc case 0: break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2,0x10); //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 @@ -295,7 +295,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc writeBits_Fast(outBuf,&bitPos,1,0);//0x00 break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2,0x11); //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 @@ -314,7 +314,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc case 0: break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 //writeBits_Fast(outBuf,&bitPos,1,0); @@ -322,7 +322,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc writeBits_Fast(outBuf,&bitPos,1,0); break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 //writeBits_Fast(outBuf,&bitPos,1,0); @@ -330,7 +330,7 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc writeBits_Fast(outBuf,&bitPos,1,1); break; default: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits)); @@ -343,15 +343,15 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc break; } - //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG uint32_t bytePos=(bitPos+7)/8; //*(uint32_t*)(&outBuf[9])=bytePos; *(uint32_t*)(&outBuf[1])=bytePos; - if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG + //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG if(D_G){assert(bitPos==CSparseBits);} //**************************************************************************************** @@ -359,9 +359,9 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc }else { //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} *numOutBytes=CNonSparseBytes; - if(D_G){printf("CNonSparse\n");} //DEBUG - if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG - //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG + //if(D_G){printf("CNonSparse\n");} //DEBUG + //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG + ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG outBuf[0]=3; //mode ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. @@ -374,33 +374,33 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc outBuf[6]=bp->ECQBits; bitPos=7*8; //Currently, we are at the end of 7th byte. - //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbSize;i++){ writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point } - //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG + ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG for(i=0;i<p->sbNum;i++){ writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale } - //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG - //if(DEBUG)printf("ECQBits:%d\n",ECQBits); + ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG + ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); switch(bp->ECQBits){ case 2: for(i=0;i<p->bSize;i++){ switch(ECQ[i]){ case 0: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG writeBits_Fast(outBuf,&bitPos,1,1);//0x1 break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); @@ -412,60 +412,60 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc } break; default: //ECQBits>2 - //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG for(i=0;i<p->bSize;i++){ - //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG - //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG - //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG + ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG switch(ECQ[i]){ case 0: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; writeBits_Fast(outBuf,&bitPos,1,1); //0x1 //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; case 1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; case -1: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; default: - //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG - //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG + ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG //temp1=bitPos; //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 writeBits_Fast(outBuf,&bitPos,1,0); writeBits_Fast(outBuf,&bitPos,1,1); //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); - //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG break; } } break; } - //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG @@ -473,20 +473,20 @@ static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* sc //*(uint32_t*)(&outBuf[9])=bytePos; *(uint32_t*)(&outBuf[1])=bytePos; - if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG + //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG if(D_G){assert(bitPos==CNonSparseBits);} } - //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG + ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG } static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ pastri_blockParams bp; - if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG - if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG - if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG - if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG + //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG + //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG + //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG + //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG int64_t patternQ[MAX_PS_SIZE]; int64_t scalesQ[MAX_PS_SIZE]; @@ -566,7 +566,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past switch(inBuf[0]){ //R:UCSparse case 0: - if(D_G){printf("\nDC:UCSparse\n");} //DEBUG + //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG //bp->nonZeros=*(uint16_t*)(&inBuf[9]); //bytePos=11; bp->nonZeros=*(uint16_t*)(&inBuf[1]); @@ -591,19 +591,19 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past data[_1DIdx]=*(float*)(&inBuf[bytePos]); bytePos+=8; } - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG break; //R:UCNonSparse case 1: - if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG + //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); memcpy(data, &inBuf[1], p->bSize*8); bytePos=p->bSize*8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG break; //R:CSparse case 2: - if(D_G){printf("\nDC:CSparse\n");} //DEBUG + //if(D_G){printf("\nDC:CSparse\n");} //DEBUG //for(j=0;j<p->bSize;j++){ // data[j]=0; //} @@ -614,27 +614,27 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past bp->patternBits=inBuf[5]; bp->ECQBits=inBuf[6]; - if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG //bp->numOutliers=*(uint16_t*)(&inBuf[15]); //bitPos=17*8; bp->numOutliers=*(uint16_t*)(&inBuf[7]); bitPos=9*8; - if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG + //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); bp->binSize=p->usedEb*2; - if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG for(j=0;j<p->sbSize;j++){ patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point - if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} } for(j=0;j<p->sbNum;j++){ scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale - if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} } /* //Splitting @@ -648,13 +648,13 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past switch(bp->ECQBits){ case 2: for(j=0;j<bp->numOutliers;j++){ - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); ECQTemp=readBits_I64(inBuf,&bitPos,1); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -662,32 +662,32 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting ECQ[_1DIdx]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; default: //bp->ECQBits>2 - if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG + //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG for(j=0;j<bp->numOutliers;j++){ _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; temp=readBits_UI64(inBuf,&bitPos,1); - //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG + ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG switch(temp){ case 0: //+-1 ECQTemp=readBits_I64(inBuf,&bitPos,1); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); break; case 1: //Others ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG - //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); break; //default: - // printf("ERROR: Bad 2-bit value: 0x%lx",temp); + //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); // assert(0); //AMG // break; } @@ -695,7 +695,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting ECQ[_1DIdx]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; } @@ -704,7 +704,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); bytePos=(bitPos+7)/8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); @@ -712,7 +712,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past break; //R:CNonSparse case 3: - if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG + //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG //for(j=0;j<p->bSize;j++){ // data[j]=0; @@ -724,7 +724,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past bp->patternBits=inBuf[5]; bp->ECQBits=inBuf[6]; - if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG + //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG //bitPos=15*8; bitPos=7*8; @@ -732,27 +732,27 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); bp->binSize=p->usedEb*2; - if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG + //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG for(j=0;j<p->sbSize;j++){ patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point - if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} + //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} } for(j=0;j<p->sbNum;j++){ scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale - if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} + //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} } /* //Splitting for(j=0;j<p->bSize;j++){ data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; - //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} + ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} } */ switch(bp->ECQBits){ case 2: for(j=0;j<p->bSize;j++){ - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); temp=readBits_UI64(inBuf,&bitPos,1); switch(temp){ @@ -768,7 +768,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past break; } - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -776,39 +776,39 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past //data[j]-=ECQTemp*bp->binSize; //Splitting ECQ[j]=ECQTemp; - //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG + ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG } break; default: //bp->ECQBits>2 - //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); + ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); for(j=0;j<p->bSize;j++){ - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG - //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG + ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); temp=readBits_UI64(inBuf,&bitPos,1); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG switch(temp){ case 0: - //if(DEBUG)printf("Read:0"); + ////if(DEBUG)printf("Read:0"); temp2=readBits_UI64(inBuf,&bitPos,1); switch(temp2){ case 0: - //if(DEBUG)printf("0"); + ////if(DEBUG)printf("0"); ECQTemp=readBits_I64(inBuf,&bitPos,1); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; case 1: - //if(DEBUG)printf("1\n"); + ////if(DEBUG)printf("1\n"); ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); - //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; default: assert(0); @@ -816,16 +816,16 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past } break; case 1: - //if(DEBUG)printf("Read:1\n"); + ////if(DEBUG)printf("Read:1\n"); ECQTemp=0; - //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); + ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); break; default: assert(0); break; } - //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG + ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG //continue; //sb=_1DIdx/p->sbSize; //localIdx=_1DIdx%p->sbSize; @@ -833,7 +833,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past //data[j]-=ECQTemp*bp->binSize; //Splitting ECQ[j]=ECQTemp; - //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG + ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG } break; } @@ -841,7 +841,7 @@ static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,past //patternQ=(int64_t*)(inBuf+15); //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); bytePos=(bitPos+7)/8; - if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG + //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); @@ -879,19 +879,19 @@ static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned c /* for(i=0;i<p->bSize;i++){ if(idx0[i]!=idx0_dc[i]){ - printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); + //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); assert(0); } if(idx1[i]!=idx1_dc[i]){ - printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); + //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); assert(0); } if(idx2[i]!=idx2_dc[i]){ - printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); + //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); assert(0); } if(idx3[i]!=idx3_dc[i]){ - printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); + //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); assert(0); } } @@ -900,7 +900,7 @@ static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned c //Comparing Data: for(i=0;i<p->bSize;i++){ if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ - printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); + //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); assert(0); } } diff --git a/thirdparty/SZ/sz/include/sz.h b/thirdparty/SZ/sz/include/sz.h index 31c3260..42f1fc8 100644 --- a/thirdparty/SZ/sz/include/sz.h +++ b/thirdparty/SZ/sz/include/sz.h @@ -54,6 +54,7 @@ #include "pastri.h" #include "sz_float_ts.h" #include "szd_float_ts.h" +#include "utility.h" #ifdef _WIN32 #define PATH_SEPARATOR ';' @@ -74,11 +75,11 @@ extern "C" { //typedef long int64_t; //typedef unsigned long uint64_t; -#define SZ_VERNUM 0x0140 -#define SZ_VER_MAJOR 1 -#define SZ_VER_MINOR 4 -#define SZ_VER_BUILD 13 -#define SZ_VER_REVISION 5 +#define SZ_VERNUM 0x0200 +#define SZ_VER_MAJOR 2 +#define SZ_VER_MINOR 0 +#define SZ_VER_BUILD 2 +#define SZ_VER_REVISION 0 #define PASTRI 103 #define HZ 102 @@ -130,6 +131,9 @@ extern "C" { #define SZ_DEFAULT_COMPRESSION 2 #define SZ_TEMPORAL_COMPRESSION 3 +#define SZ_NO_REGRESSION 0 +#define SZ_WITH_LINEAR_REGRESSION 1 + #define SZ_PWR_MIN_TYPE 0 #define SZ_PWR_AVG_TYPE 1 #define SZ_PWR_MAX_TYPE 2 @@ -151,6 +155,10 @@ extern "C" { #define MetaDataByteLength 20 #define numOfBufferedSteps 1 //the number of time steps in the buffer + + +#define GZIP_COMPRESSOR 0 //i.e., ZLIB_COMPRSSOR +#define ZSTD_COMPRESSOR 1 //Note: the following setting should be consistent with stateNum in Huffman.h //#define intvCapacity 65536 @@ -236,6 +244,7 @@ typedef struct sz_params unsigned int quantization_intervals; unsigned int maxRangeRadius; int sol_ID;// it's always SZ, unless the setting is PASTRI compression mode (./configure --enable-pastri) + int losslessCompressor; int sampleDistance; //2 bytes float predThreshold; // 2 bytes int szMode; //* 0 (best speed) or 1 (better compression with Gzip) or 3 temporal-dimension based compression @@ -278,6 +287,10 @@ typedef struct sz_tsc_metainfo int currentStep; char metadata_filename[256]; FILE *metadata_file; + unsigned char* bit_array; //sihuan added + size_t intersect_size; //sihuan added + int64_t* hist_index; //sihuan added: prestep index + } sz_tsc_metadata; extern int versionNumber[4]; @@ -289,6 +302,8 @@ extern int sysEndianType; //*sysEndianType is actually set automatically. extern sz_params *confparams_cpr; extern sz_params *confparams_dec; extern sz_exedata *exe_params; +extern int sz_with_regression; + //------------------------------------------------ extern SZ_VarSet* sz_varset; extern sz_multisteps *multisteps; //compression based on multiple time steps (time-dimension based compression) @@ -356,8 +371,6 @@ void filloutDimArray(size_t* dim, size_t r5, size_t r4, size_t r3, size_t r2, si size_t compute_total_batch_size(); -int isZlibFormat(unsigned char magic1, unsigned char magic2); - void SZ_registerVar(char* varName, int dataType, void* data, int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); diff --git a/thirdparty/SZ/sz/include/sz_double.h b/thirdparty/SZ/sz/include/sz_double.h index b186d12..9962366 100644 --- a/thirdparty/SZ/sz/include/sz_double.h +++ b/thirdparty/SZ/sz/include/sz_double.h @@ -75,6 +75,12 @@ size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, siz TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d, size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); +unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); +unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); +unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/sz_double_pwr.h b/thirdparty/SZ/sz/include/sz_double_pwr.h index ce81629..9cb9978 100644 --- a/thirdparty/SZ/sz/include/sz_double_pwr.h +++ b/thirdparty/SZ/sz/include/sz_double_pwr.h @@ -37,6 +37,10 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, double *oriData, size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize); +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t dataLength, size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t *outSize, double min, double max); +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/sz_float.h b/thirdparty/SZ/sz/include/sz_float.h index 6ab9231..d08827c 100644 --- a/thirdparty/SZ/sz/include/sz_float.h +++ b/thirdparty/SZ/sz/include/sz_float.h @@ -128,6 +128,14 @@ size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, siz TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f, size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); + +unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); +unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); + +unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/sz_float_pwr.h b/thirdparty/SZ/sz/include/sz_float_pwr.h index 3ef0e7c..a2432b3 100644 --- a/thirdparty/SZ/sz/include/sz_float_pwr.h +++ b/thirdparty/SZ/sz/include/sz_float_pwr.h @@ -44,6 +44,10 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, float *oriData, size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize); +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max); +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max); +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/szd_double.h b/thirdparty/SZ/sz/include/szd_double.h index daf3622..15bb81c 100644 --- a/thirdparty/SZ/sz/include/szd_double.h +++ b/thirdparty/SZ/sz/include/szd_double.h @@ -24,6 +24,8 @@ void getSnapshotData_double_1D(double** data, size_t dataSeriesLength, TightData void getSnapshotData_double_2D(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps, int errBoundMode); void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps, int errBoundMode); void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageD* tdps, int errBoundMode); +void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data); +void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); diff --git a/thirdparty/SZ/sz/include/szd_double_pwr.h b/thirdparty/SZ/sz/include/szd_double_pwr.h index 5d3b257..2756685 100644 --- a/thirdparty/SZ/sz/include/szd_double_pwr.h +++ b/thirdparty/SZ/sz/include/szd_double_pwr.h @@ -21,6 +21,10 @@ double* extractRealPrecision_3D_double(size_t R1, size_t R2, size_t R3, int bloc void decompressDataSeries_double_3D_pwr(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); +void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); +void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/szd_float.h b/thirdparty/SZ/sz/include/szd_float.h index 8aaf42c..b2168fe 100644 --- a/thirdparty/SZ/sz/include/szd_float.h +++ b/thirdparty/SZ/sz/include/szd_float.h @@ -31,6 +31,11 @@ size_t decompressDataSeries_float_2D_RA_block(float * data, float mean, size_t d int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); size_t decompressDataSeries_float_3D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, float * unpredictable_data); + +void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data); +void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); +void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); + #ifdef __cplusplus } #endif diff --git a/thirdparty/SZ/sz/include/szd_float_pwr.h b/thirdparty/SZ/sz/include/szd_float_pwr.h index 0907517..7f57b45 100644 --- a/thirdparty/SZ/sz/include/szd_float_pwr.h +++ b/thirdparty/SZ/sz/include/szd_float_pwr.h @@ -22,6 +22,9 @@ void decompressDataSeries_float_3D_pwr(float** data, size_t r1, size_t r2, size_ char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength); void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); +void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); +void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); #ifdef __cplusplus } diff --git a/thirdparty/SZ/sz/include/utility.h b/thirdparty/SZ/sz/include/utility.h new file mode 100644 index 0000000..b66c141 --- /dev/null +++ b/thirdparty/SZ/sz/include/utility.h @@ -0,0 +1,43 @@ +/** + * @file utility.h + * @author Sheng Di, Sihuan Li + * @date July, 2018 + * @brief Header file for the utility.c. + * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef _UTILITY_H +#define _UTILITY_H + +#include "sz.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//sihuan added: use a assistant struct to do sorting and swap that are easy to implement: should +//consider optimizing the performance later. +typedef struct sort_ast_particle{ + int64_t id; + float var[6]; +} sort_ast_particle; + +int compare_struct(const void* obj1, const void* obj2);//sihuan added: the compare function in the qsort parameter for 2 structures +void reorder_vars(SZ_VarSet* vset);//sihuan added: reorder the variables increasingly by their index +size_t intersectAndsort(int64_t* preIndex, size_t preLen, SZ_VarSet* curVar, size_t dataLen, unsigned char* bitarray); +//sihuan added: find intersection and keep new var sorted by id +void write_reordered_tofile(SZ_VarSet* curVar, size_t dataLen); +//sihuan added: write the reordered input to files for further decompression validation +float calculate_delta_t(size_t size);//sihuan added + +int is_lossless_compressed_data(unsigned char* compressedBytes, size_t cmpSize); +unsigned long sz_lossless_compress(int losslessCompressor, int level, unsigned char* data, unsigned long dataLength, unsigned char** compressBytes); +unsigned long sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize); +unsigned long sz_lossless_decompress65536bytes(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData); + +#ifdef __cplusplus +} +#endif + +#endif /* ----- #ifndef _UTILITY_H ----- */ diff --git a/thirdparty/SZ/sz/src/ByteToolkit.c b/thirdparty/SZ/sz/src/ByteToolkit.c index 31dbf66..43fd811 100644 --- a/thirdparty/SZ/sz/src/ByteToolkit.c +++ b/thirdparty/SZ/sz/src/ByteToolkit.c @@ -430,7 +430,7 @@ long bytesToLong(unsigned char* bytes) } //the byte to input is in the big-endian format -float bytesToFloat(unsigned char* bytes) +inline float bytesToFloat(unsigned char* bytes) { lfloat buf; memcpy(buf.byte, bytes, 4); @@ -439,7 +439,7 @@ float bytesToFloat(unsigned char* bytes) return buf.value; } -void floatToBytes(unsigned char *b, float num) +inline void floatToBytes(unsigned char *b, float num) { lfloat buf; buf.value = num; @@ -449,7 +449,7 @@ void floatToBytes(unsigned char *b, float num) } //the byte to input is in the big-endian format -double bytesToDouble(unsigned char* bytes) +inline double bytesToDouble(unsigned char* bytes) { ldouble buf; memcpy(buf.byte, bytes, 8); @@ -458,7 +458,7 @@ double bytesToDouble(unsigned char* bytes) return buf.value; } -void doubleToBytes(unsigned char *b, double num) +inline void doubleToBytes(unsigned char *b, double num) { ldouble buf; buf.value = num; @@ -507,7 +507,7 @@ int extractBytes(unsigned char* byteArray, size_t k, int validLength) return result; } -int getMaskRightCode(int m) { +inline int getMaskRightCode(int m) { switch (m) { case 1: return 0x01; @@ -530,16 +530,16 @@ int getMaskRightCode(int m) { } } -int getLeftMovingCode(int kMod8) +inline int getLeftMovingCode(int kMod8) { return getMaskRightCode(8 - kMod8); } -int getRightMovingSteps(int kMod8, int resiBitLength) { +inline int getRightMovingSteps(int kMod8, int resiBitLength) { return 8 - kMod8 - resiBitLength; } -int getRightMovingCode(int kMod8, int resiBitLength) +inline int getRightMovingCode(int kMod8, int resiBitLength) { int rightMovingSteps = 8 - kMod8 - resiBitLength; if(rightMovingSteps < 0) @@ -814,7 +814,7 @@ void convertULongArrayToBytes(uint64_t* states, size_t stateLength, unsigned cha } -size_t bytesToSize(unsigned char* bytes) +inline size_t bytesToSize(unsigned char* bytes) { size_t result = 0; if(exe_params->SZ_SIZE_TYPE==4) @@ -824,7 +824,7 @@ size_t bytesToSize(unsigned char* bytes) return result; } -void sizeToBytes(unsigned char* outBytes, size_t size) +inline void sizeToBytes(unsigned char* outBytes, size_t size) { if(exe_params->SZ_SIZE_TYPE==4) intToBytes_bigEndian(outBytes, size);//4 diff --git a/thirdparty/SZ/sz/src/DynamicDoubleArray.c b/thirdparty/SZ/sz/src/DynamicDoubleArray.c index 20eb579..54bbb10 100644 --- a/thirdparty/SZ/sz/src/DynamicDoubleArray.c +++ b/thirdparty/SZ/sz/src/DynamicDoubleArray.c @@ -21,7 +21,7 @@ void new_DDA(DynamicDoubleArray **dda, size_t cap) { void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data) { - int size = dba->size; + size_t size = dba->size; if(size>0) *data = (double*)malloc(size * sizeof(double)); else diff --git a/thirdparty/SZ/sz/src/DynamicFloatArray.c b/thirdparty/SZ/sz/src/DynamicFloatArray.c index f775827..1a80a48 100644 --- a/thirdparty/SZ/sz/src/DynamicFloatArray.c +++ b/thirdparty/SZ/sz/src/DynamicFloatArray.c @@ -21,7 +21,7 @@ void new_DFA(DynamicFloatArray **dfa, size_t cap) { void convertDFAtoFloats(DynamicFloatArray *dfa, float **data) { - int size = dfa->size; + size_t size = dfa->size; if(size>0) *data = (float*)malloc(size * sizeof(float)); else diff --git a/thirdparty/SZ/sz/src/DynamicIntArray.c b/thirdparty/SZ/sz/src/DynamicIntArray.c index 3196ab9..9b713aa 100644 --- a/thirdparty/SZ/sz/src/DynamicIntArray.c +++ b/thirdparty/SZ/sz/src/DynamicIntArray.c @@ -21,7 +21,7 @@ void new_DIA(DynamicIntArray **dia, size_t cap) { void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data) { - int size = dia->size; + size_t size = dia->size; if(size>0) *data = (unsigned char*)malloc(size * sizeof(char)); else diff --git a/thirdparty/SZ/sz/src/Huffman.c b/thirdparty/SZ/sz/src/Huffman.c index d067609..6004090 100644 --- a/thirdparty/SZ/sz/src/Huffman.c +++ b/thirdparty/SZ/sz/src/Huffman.c @@ -651,23 +651,26 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize) { - size_t i, nodeCount = 0; + size_t i; + int nodeCount = 0; unsigned char *treeBytes, buffer[4]; init(huffmanTree, s, length); for (i = 0; i < huffmanTree->stateNum; i++) - if (huffmanTree->code[i]) nodeCount++; + if (huffmanTree->code[i]) nodeCount++; nodeCount = nodeCount*2-1; unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes); //printf("treeByteSize=%d\n", treeByteSize); *out = (unsigned char*)malloc(length*sizeof(int)+treeByteSize); intToBytes_bigEndian(buffer, nodeCount); memcpy(*out, buffer, 4); - memcpy(*out+4, treeBytes, treeByteSize); + intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals + memcpy(*out+4, buffer, 4); + memcpy(*out+8, treeBytes, treeByteSize); free(treeBytes); size_t enCodeSize = 0; - encode(huffmanTree, s, length, *out+4+treeByteSize, &enCodeSize); - *outSize = 4+treeByteSize+enCodeSize; + encode(huffmanTree, s, length, *out+8+treeByteSize, &enCodeSize); + *outSize = 8+treeByteSize+enCodeSize; //unsigned short state[length]; //decode(*out+4+treeByteSize, enCodeSize, qqq[0], state); @@ -682,7 +685,7 @@ void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLe { size_t encodeStartIndex; size_t nodeCount = bytesToInt_bigEndian(s); - node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+4, nodeCount); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount); //sdi: Debug /* build_code(root, 0, 0, 0); @@ -701,7 +704,7 @@ void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLe encodeStartIndex = 1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int); else encodeStartIndex = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char); - decode(s+4+encodeStartIndex, targetLength, root, out); + decode(s+8+encodeStartIndex, targetLength, root, out); } void SZ_ReleaseHuffman(HuffmanTree* huffmanTree) diff --git a/thirdparty/SZ/sz/src/TightDataPointStorageD.c b/thirdparty/SZ/sz/src/TightDataPointStorageD.c index 6ece9db..f31bf85 100644 --- a/thirdparty/SZ/sz/src/TightDataPointStorageD.c +++ b/thirdparty/SZ/sz/src/TightDataPointStorageD.c @@ -46,6 +46,10 @@ void new_TightDataPointStorageD_Empty(TightDataPointStorageD **this) (*this)->segment_size = 0; (*this)->pwrErrBoundBytes = NULL; (*this)->pwrErrBoundBytes_size = 0; + + (*this)->raBytes = NULL; + (*this)->raBytes_size = 0; + } int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsigned char* flatBytes, size_t flatBytesLength) @@ -83,10 +87,13 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi sz_params* params = convertBytesToSZParams(&(flatBytes[index])); int mode = confparams_dec->szMode; int predictionMode = confparams_dec->predictionMode; + int losslessCompressor = confparams_dec->losslessCompressor; if(confparams_dec!=NULL) free(confparams_dec); confparams_dec = params; confparams_dec->szMode = mode; + confparams_dec->losslessCompressor = losslessCompressor; + if(mode==SZ_TEMPORAL_COMPRESSION) { confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION; @@ -94,6 +101,8 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi } index += MetaDataByteLength; + int isRandomAccess = (sameRByte >> 7) & 0x01; + unsigned char dsLengthBytes[8]; for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) dsLengthBytes[i] = flatBytes[index++]; @@ -121,6 +130,13 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi else (*this)->allSameData = 0; + if(isRandomAccess == 1) + { + (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE; + (*this)->raBytes = &(flatBytes[index]); + return errorBoundMode; + } + int rtype_ = sameRByte & 0x08; //1000 unsigned char byteBuf[8]; @@ -204,12 +220,16 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi { (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1; } + + int minLogValueSize = 0; + if(errorBoundMode>=PW_REL) + minLogValueSize = 8; if ((*this)->rtypeArray != NULL) { (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 8 - (*this)->rtypeArray_size - - (*this)->typeArray_size - (*this)->leadNumArray_size + - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; for (i = 0; i < (*this)->rtypeArray_size; i++) (*this)->rtypeArray[i] = flatBytes[index++]; @@ -217,10 +237,15 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi else { (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 - - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; } + if(errorBoundMode >= PW_REL){ + (*this)->minLogValue = bytesToDouble(&flatBytes[index]); + index+=8; + } + (*this)->typeArray = &flatBytes[index]; //retrieve the number of states (i.e., stateNum) (*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum @@ -422,6 +447,13 @@ void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* byte for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST bytes[k++] = exactMidBytesLength[i]; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + doubleToBytes(exactMidBytesLength, tdps->minLogValue); + for(i = 0;i < 8; i++) + bytes[k++] = exactMidBytesLength[i]; + } + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); k += tdps->typeArray_size; if(confparams_cpr->errorBoundMode>=PW_REL) @@ -521,6 +553,14 @@ void convertTDPStoBytes_double_reserve(TightDataPointStorageD* tdps, unsigned ch memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); k += tdps->rtypeArray_size; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + doubleToBytes(exactMidBytesLength, tdps->minLogValue); + for(i = 0;i < 8; i++) + bytes[k++] = exactMidBytesLength[i]; + } + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); k += tdps->typeArray_size; if(confparams_cpr->errorBoundMode>=PW_REL) @@ -583,15 +623,19 @@ void convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char** { size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + + int minLogValueSize = 0; if(confparams_cpr->errorBoundMode>=PW_REL) { segmentL = exe_params->SZ_SIZE_TYPE; radExpoL = 1; pwrBoundArrayL = 4; + minLogValueSize = 8; } size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + minLogValueSize /*max absolute log value*/ + tdps->typeArray_size + tdps->leadNumArray_size + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; @@ -605,16 +649,18 @@ void convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char** { size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + int minLogValueSize = 0; if(confparams_cpr->errorBoundMode>=PW_REL) { segmentL = exe_params->SZ_SIZE_TYPE; radExpoL = 1; pwrBoundArrayL = 4; + minLogValueSize = 8; } size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 8 + tdps->rtypeArray_size - + tdps->typeArray_size + tdps->leadNumArray_size + + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit diff --git a/thirdparty/SZ/sz/src/TightDataPointStorageF.c b/thirdparty/SZ/sz/src/TightDataPointStorageF.c index 23a69b8..a30f8d9 100644 --- a/thirdparty/SZ/sz/src/TightDataPointStorageF.c +++ b/thirdparty/SZ/sz/src/TightDataPointStorageF.c @@ -46,6 +46,9 @@ void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this) (*this)->segment_size = 0; (*this)->pwrErrBoundBytes = NULL; (*this)->pwrErrBoundBytes_size = 0; + + (*this)->raBytes = NULL; + (*this)->raBytes_size = 0; } int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength) @@ -81,10 +84,13 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi sz_params* params = convertBytesToSZParams(&(flatBytes[index])); int mode = confparams_dec->szMode; int predictionMode = confparams_dec->predictionMode; + int losslessCompressor = confparams_dec->losslessCompressor; if(confparams_dec!=NULL) free(confparams_dec); confparams_dec = params; confparams_dec->szMode = mode; + confparams_dec->losslessCompressor = losslessCompressor; + if(mode==SZ_TEMPORAL_COMPRESSION) { confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION; @@ -92,6 +98,8 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi } index += MetaDataByteLength; + + int isRandomAccess = (sameRByte >> 7) & 0x01; unsigned char dsLengthBytes[8]; for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) @@ -117,6 +125,12 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi } else (*this)->allSameData = 0; + if(isRandomAccess == 1) + { + (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE; + (*this)->raBytes = &(flatBytes[index]); + return errorBoundMode; + } int rtype_ = sameRByte & 0x08; //=00001000 unsigned char byteBuf[8]; @@ -165,7 +179,7 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi { for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++) byteBuf[i] = flatBytes[index++]; - (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST) + (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST) } else (*this)->rtypeArray_size = 0; @@ -199,11 +213,15 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1; } + int minLogValueSize = 0; + if(errorBoundMode>=PW_REL) + minLogValueSize = 4; + if ((*this)->rtypeArray != NULL) { (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 - - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size - - (*this)->typeArray_size - (*this)->leadNumArray_size + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size + - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; for (i = 0; i < (*this)->rtypeArray_size; i++) (*this)->rtypeArray[i] = flatBytes[index++]; @@ -211,9 +229,15 @@ int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsi else { (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 - - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size + - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; - } + } + + if(errorBoundMode>=PW_REL) + { + (*this)->minLogValue = bytesToFloat(&flatBytes[index]); + index+=4; + } (*this)->typeArray = &flatBytes[index]; //retrieve the number of states (i.e., stateNum) @@ -418,6 +442,13 @@ void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST bytes[k++] = exactMidBytesLength[i]; + if(confparams_cpr->errorBoundMode>=PW_REL) + { + floatToBytes(exactMidBytesLength, tdps->minLogValue); + for(i=0;i<4;i++) + bytes[k++] = exactMidBytesLength[i]; + } + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); k += tdps->typeArray_size; if(confparams_cpr->errorBoundMode>=PW_REL) @@ -519,6 +550,14 @@ void convertTDPStoBytes_float_reserve(TightDataPointStorageF* tdps, unsigned cha memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); k += tdps->rtypeArray_size; + + if(confparams_cpr->errorBoundMode>=PW_REL) + { + floatToBytes(exactMidBytesLength, tdps->minLogValue); + for(i=0;i<4;i++) + bytes[k++] = exactMidBytesLength[i]; + } + memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); k += tdps->typeArray_size; if(confparams_cpr->errorBoundMode>=PW_REL) @@ -581,15 +620,17 @@ void convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char** { size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + int minLogValueSize = 0; if(confparams_cpr->errorBoundMode>=PW_REL) { segmentL = exe_params->SZ_SIZE_TYPE; radExpoL = 1; pwrBoundArrayL = 4; + minLogValueSize = 4; } size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 - + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; @@ -603,16 +644,18 @@ void convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char** { size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; + int minLogValueSize = 0; if(confparams_cpr->errorBoundMode>=PW_REL) { segmentL = exe_params->SZ_SIZE_TYPE; radExpoL = 1; pwrBoundArrayL = 4; + minLogValueSize = 4; } size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4 + tdps->rtypeArray_size - + tdps->typeArray_size + tdps->leadNumArray_size + + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit @@ -721,7 +764,7 @@ void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned ch * to free the memory used in the compression * */ void free_TightDataPointStorageF(TightDataPointStorageF *tdps) -{ +{ if(tdps->rtypeArray!=NULL) free(tdps->rtypeArray); if(tdps->typeArray!=NULL) diff --git a/thirdparty/SZ/sz/src/TypeManager.c b/thirdparty/SZ/sz/src/TypeManager.c index 42474fb..638f3cb 100644 --- a/thirdparty/SZ/sz/src/TypeManager.c +++ b/thirdparty/SZ/sz/src/TypeManager.c @@ -43,7 +43,33 @@ size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArra } return byteLength; } - + +size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result) +{ + size_t byteLength = 0; + size_t i, j; + if(intArrayLength%8==0) + byteLength = intArrayLength/8; + else + byteLength = intArrayLength/8+1; + + size_t n = 0; + int tmp, type; + for(i = 0;i<byteLength;i++) + { + tmp = 0; + for(j = 0;j<8&&n<intArrayLength;j++) + { + type = intArray[n]; + if(type == 1) + tmp = (tmp | (1 << (7-j))); + n++; + } + result[i] = (unsigned char)tmp; + } + return byteLength; +} + void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray) { if(intArrayLength > byteArrayLength*8) @@ -148,6 +174,46 @@ size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t tim return byteLength; } +size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result) +{ + size_t i, j, byteLength = 0; + if(timeStepTypeLength%4==0) + byteLength = timeStepTypeLength*2/8; + else + byteLength = timeStepTypeLength*2/8+1; + + size_t n = 0; + for(i = 0;i<byteLength;i++) + { + int tmp = 0; + for(j = 0;j<4&&n<timeStepTypeLength;j++) + { + int type = timeStepType[n]; + switch(type) + { + case 0: + + break; + case 1: + tmp = (tmp | (1 << (6-j*2))); + break; + case 2: + tmp = (tmp | (2 << (6-j*2))); + break; + case 3: + tmp = (tmp | (3 << (6-j*2))); + break; + default: + printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type); + exit(0); + } + n++; + } + result[i] = (unsigned char)tmp; + } + return byteLength; +} + void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray) { if(stepLength > byteArrayLength*4) @@ -291,7 +357,7 @@ void convertByteArray2IntArray_fast_3b(size_t stepLength, unsigned char* byteArr } } -int getLeftMovingSteps(size_t k, unsigned char resiBitLength) +inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength) { return 8 - k%8 - resiBitLength; } diff --git a/thirdparty/SZ/sz/src/callZlib.c b/thirdparty/SZ/sz/src/callZlib.c index 0e392b7..4e4bb6f 100644 --- a/thirdparty/SZ/sz/src/callZlib.c +++ b/thirdparty/SZ/sz/src/callZlib.c @@ -27,6 +27,25 @@ } \ } +int isZlibFormat(unsigned char magic1, unsigned char magic2) +{ + if(magic1==104&&magic2==5) //DC+BS + return 1; + if(magic1==104&&magic2==129) //DC+DC + return 1; + if(magic1==104&&magic2==222) //DC+BC + return 1; + if(magic1==120&&magic2==1) //BC+BS + return 1; + if(magic1==120&&magic2==94) //BC+? + return 1; + if(magic1==120&&magic2==156) //BC+DC + return 1; + if(magic1==120&&magic2==218) //BC+BS + return 1; + return 0; +} + /*zlib_compress() is only valid for median-size data compression. */ unsigned long zlib_compress(unsigned char* data, unsigned long dataLength, unsigned char** compressBytes, int level) { @@ -195,6 +214,9 @@ unsigned long zlib_compress5(unsigned char* data, unsigned long dataLength, unsi strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = deflateInit(&strm, level); + //int windowBits = 15; + //ret = deflateInit2(&strm, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY + if (ret != Z_OK) return ret; diff --git a/thirdparty/SZ/sz/src/conf.c b/thirdparty/SZ/sz/src/conf.c index 8e6959d..cc5ccf3 100644 --- a/thirdparty/SZ/sz/src/conf.c +++ b/thirdparty/SZ/sz/src/conf.c @@ -102,11 +102,16 @@ int SZ_ReadConf(const char* sz_cfgFile) { confparams_cpr->sampleDistance = 100; confparams_cpr->szMode = SZ_BEST_COMPRESSION; - - confparams_cpr->gzipMode = 1; //fast mode + confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR; + if(confparams_cpr->losslessCompressor==ZSTD_COMPRESSOR) + confparams_cpr->gzipMode = 3; //fast mode + else + confparams_cpr->gzipMode = 1; //high speed mode confparams_cpr->errorBoundMode = PSNR; confparams_cpr->psnr = 90; + confparams_cpr->absErrBound = 1E-4; + confparams_cpr->relBoundRatio = 1E-4; confparams_cpr->pw_relBoundRatio = 1E-3; confparams_cpr->segment_size = 36; @@ -114,6 +119,8 @@ int SZ_ReadConf(const char* sz_cfgFile) { confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE; confparams_cpr->snapshotCmprStep = 5; + + sz_with_regression = SZ_WITH_LINEAR_REGRESSION; return SZ_SCES; } @@ -212,7 +219,26 @@ int SZ_ReadConf(const char* sz_cfgFile) { return SZ_NSCS; } - modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", NULL); + modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR"); + if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0) + confparams_cpr->losslessCompressor = GZIP_COMPRESSOR; + else if(strcmp(modeBuf, "ZSTD_COMPRESSOR")==0) + confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; + else + { + printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\ + printf("No Such a lossless compressor: %s\n", modeBuf); + iniparser_freedict(ini); + return SZ_NSCS; + } + + modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES"); + if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0) + sz_with_regression = SZ_WITH_LINEAR_REGRESSION; + else + sz_with_regression = SZ_NO_REGRESSION; + + modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", "Gzip_BEST_SPEED"); if(modeBuf==NULL) { printf("[SZ] Error: Null Gzip mode setting (please check sz.config file)\n"); @@ -233,6 +259,29 @@ int SZ_ReadConf(const char* sz_cfgFile) { return SZ_NSCS; } + modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED"); + if(modeBuf==NULL) + { + printf("[SZ] Error: Null Zstd mode setting (please check sz.config file)\n"); + iniparser_freedict(ini); + return SZ_NSCS; + } + else if(strcmp(modeBuf, "Zstd_BEST_SPEED")==0) + confparams_cpr->gzipMode = 1; + else if(strcmp(modeBuf, "Zstd_HIGH_SPEED")==0) + confparams_cpr->gzipMode = 3; + else if(strcmp(modeBuf, "Zstd_HIGH_COMPRESSION")==0) + confparams_cpr->gzipMode = 19; + else if(strcmp(modeBuf, "Zstd_BEST_COMPRESSION")==0) + confparams_cpr->gzipMode = 22; + else if(strcmp(modeBuf, "Zstd_DEFAULT_COMPRESSION")==0) + confparams_cpr->gzipMode = 3; + else + { + printf("[SZ] Error: Wrong zstd Mode (please check sz.config file)\n"); + return SZ_NSCS; + } + //TODO confparams_cpr->snapshotCmprStep = (int)iniparser_getint(ini, "PARAMETER:snapshotCmprStep", 5); diff --git a/thirdparty/SZ/sz/src/dataCompression.c b/thirdparty/SZ/sz/src/dataCompression.c index 0bb5ce1..212a104 100644 --- a/thirdparty/SZ/sz/src/dataCompression.c +++ b/thirdparty/SZ/sz/src/dataCompression.c @@ -66,14 +66,14 @@ long computeRangeSize_int(void* oriData, int dataType, size_t size, int64_t* val else if(dataType == SZ_UINT32) { unsigned int* data = (unsigned int*)oriData; - int data_; + unsigned int data_; min = data[0], max = min; computeMinMax(data); } else if(dataType == SZ_INT32) { int* data = (int*)oriData; - unsigned int data_; + int data_; min = data[0], max = min; computeMinMax(data); } @@ -595,3 +595,277 @@ int initRandomAccessBytes(unsigned char* raBytes) return k; } + +//The following functions are float-precision version of dealing with the unpredictable data points +int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData) +{ + float valueRangeSize; + + computeRangeSize_float(oriData, nbEle, &valueRangeSize, medianValue); + short radExpo = getExponent_float(valueRangeSize/2); + + int reqLength; + computeReqLength_float(precision, radExpo, &reqLength, medianValue); + + *reqBytesLength = reqLength/8; + *resiBitsLength = reqLength%8; + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + float normValue = oriData[i] - *medianValue; + + lfloat lfBuf; + lfBuf.value = normValue; + + int ignBytesLength = 32 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength; + + //float tmpValue = lfBuf.value; + + decData[i] = lfBuf.value + *medianValue; + } + return reqLength; +} + +/** + * @param float* oriData: inplace argument (input / output) + * + * */ +int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, float medianValue) +{ + //allocate memory for coefficient compression arrays + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + unsigned char preDataBytes[4] = {0,0,0,0}; + + //allocate memory for vce and lce + FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + compressSingleFloatValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,4); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + oriData[i] = vce->data; + } + convertDIAtoInts(exactLeadNumArray, leadArray); + convertDBAtoBytes(exactMidByteArray,midArray); + convertDIAtoInts(resiBitArray, resiArray); + + size_t midArraySize = exactMidByteArray->size; + + free(vce); + free(lce); + + free_DIA(exactLeadNumArray); + free_DBA(exactMidByteArray); + free_DIA(resiBitArray); + + return midArraySize; +} + +void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData) +{ + *decData = (float*)malloc(nbEle*sizeof(float)); + size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; + float exactData = 0; + unsigned char preBytes[4] = {0,0,0,0}; + unsigned char curBytes[4]; + int resiBits; + unsigned char leadingNum; + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + for(i = 0; i<nbEle;i++) + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 4); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToFloat(curBytes); + (*decData)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,4); + } +} + +//double-precision version of dealing with unpredictable data points in sz 2.0 +int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData) +{ + double valueRangeSize; + + computeRangeSize_double(oriData, nbEle, &valueRangeSize, medianValue); + short radExpo = getExponent_double(valueRangeSize/2); + + int reqLength; + computeReqLength_double(precision, radExpo, &reqLength, medianValue); + + *reqBytesLength = reqLength/8; + *resiBitsLength = reqLength%8; + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + double normValue = oriData[i] - *medianValue; + + ldouble ldBuf; + ldBuf.value = normValue; + + int ignBytesLength = 64 - reqLength; + if(ignBytesLength<0) + ignBytesLength = 0; + + ldBuf.lvalue = (ldBuf.lvalue >> ignBytesLength) << ignBytesLength; + + decData[i] = ldBuf.value + *medianValue; + } + return reqLength; +} + +/** + * @param double* oriData: inplace argument (input / output) + * + * */ +int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, +int reqLength, int reqBytesLength, int resiBitsLength, double medianValue) +{ + //allocate memory for coefficient compression arrays + DynamicIntArray *exactLeadNumArray; + new_DIA(&exactLeadNumArray, DynArrayInitLen); + DynamicByteArray *exactMidByteArray; + new_DBA(&exactMidByteArray, DynArrayInitLen); + DynamicIntArray *resiBitArray; + new_DIA(&resiBitArray, DynArrayInitLen); + unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0}; + + //allocate memory for vce and lce + DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); + LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); + + size_t i = 0; + for(i = 0;i < nbEle;i++) + { + compressSingleDoubleValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); + updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); + memcpy(preDataBytes,vce->curBytes,8); + addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); + oriData[i] = vce->data; + } + convertDIAtoInts(exactLeadNumArray, leadArray); + convertDBAtoBytes(exactMidByteArray,midArray); + convertDIAtoInts(resiBitArray, resiArray); + + size_t midArraySize = exactMidByteArray->size; + + free(vce); + free(lce); + + free_DIA(exactLeadNumArray); + free_DBA(exactMidByteArray); + free_DIA(resiBitArray); + + return midArraySize; +} + +void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData) +{ + *decData = (double*)malloc(nbEle*sizeof(double)); + size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; + double exactData = 0; + unsigned char preBytes[8] = {0,0,0,0,0,0,0,0}; + unsigned char curBytes[8]; + int resiBits; + unsigned char leadingNum; + + int reqBytesLength = reqLength/8; + int resiBitsLength = reqLength%8; + + for(i = 0; i<nbEle;i++) + { + // compute resiBits + resiBits = 0; + if (resiBitsLength != 0) { + int kMod8 = k % 8; + int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); + if (rightMovSteps > 0) { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code) >> rightMovSteps; + } else if (rightMovSteps < 0) { + int code1 = getLeftMovingCode(kMod8); + int code2 = getRightMovingCode(kMod8, resiBitsLength); + int leftMovSteps = -rightMovSteps; + rightMovSteps = 8 - leftMovSteps; + resiBits = (residualMidBits[p] & code1) << leftMovSteps; + p++; + resiBits = resiBits + | ((residualMidBits[p] & code2) >> rightMovSteps); + } else // rightMovSteps == 0 + { + int code = getRightMovingCode(kMod8, resiBitsLength); + resiBits = (residualMidBits[p] & code); + p++; + } + k += resiBitsLength; + } + + // recover the exact data + memset(curBytes, 0, 8); + leadingNum = leadNum[l++]; + memcpy(curBytes, preBytes, leadingNum); + for (j = leadingNum; j < reqBytesLength; j++) + curBytes[j] = exactMidBytes[curByteIndex++]; + if (resiBitsLength != 0) { + unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); + curBytes[reqBytesLength] = resiByte; + } + + exactData = bytesToDouble(curBytes); + (*decData)[i] = exactData + medianValue; + memcpy(preBytes,curBytes,8); + } +} diff --git a/thirdparty/SZ/sz/src/sz.c b/thirdparty/SZ/sz/src/sz.c index 97cb00d..6cdc35b 100644 --- a/thirdparty/SZ/sz/src/sz.c +++ b/thirdparty/SZ/sz/src/sz.c @@ -22,6 +22,7 @@ #include "rw.h" #include "Huffman.h" #include "conf.h" +#include "utility.h" //#include "CurveFillingCompressStorage.h" int versionNumber[4] = {SZ_VER_MAJOR,SZ_VER_MINOR,SZ_VER_BUILD,SZ_VER_REVISION}; @@ -36,6 +37,8 @@ sz_params *confparams_dec = NULL; //used for decompression sz_exedata *exe_params = NULL; +int sz_with_regression = SZ_WITH_LINEAR_REGRESSION; //SZ_NO_REGRESSION + /*following global variables are desgined for time-series based compression*/ /*sz_varset is not used in the single-snapshot data compression*/ SZ_VarSet* sz_varset = NULL; @@ -69,31 +72,15 @@ int SZ_Init(const char *configFilePath) int SZ_Init_Params(sz_params *params) { - int x = 1; - char *y = (char*)&x; - int endianType = BIG_ENDIAN_SYSTEM; - if(*y==1) endianType = LITTLE_ENDIAN_SYSTEM; + SZ_Init(NULL); - sysEndianType = endianType; - exe_params->SZ_SIZE_TYPE = sizeof(size_t); + if(params->losslessCompressor!=GZIP_COMPRESSOR && params->losslessCompressor!=ZSTD_COMPRESSOR) + params->losslessCompressor = ZSTD_COMPRESSOR; - // set default values - if(params->max_quant_intervals > 0) + if(params->max_quant_intervals > 0) params->maxRangeRadius = params->max_quant_intervals/2; - else - params->max_quant_intervals = params->maxRangeRadius*2; - - exe_params->intvCapacity = params->maxRangeRadius*2; - exe_params->intvRadius = params->maxRangeRadius; - - if(params->quantization_intervals>0) - { - updateQuantizationInfo(params->quantization_intervals); - exe_params->optQuantMode = 0; - } - else - exe_params->optQuantMode = 1; - + + memcpy(confparams_cpr, params, sizeof(sz_params)); if(params->quantization_intervals%2!=0) { @@ -101,9 +88,6 @@ int SZ_Init_Params(sz_params *params) return SZ_NSCS; } - confparams_cpr = (sz_params*)malloc(sizeof(sz_params)); - memcpy(confparams_cpr, params, sizeof(sz_params)); - return SZ_SCES; } @@ -535,6 +519,14 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes) isConstant = sameRByte & 0x01; //confparams_dec->szMode = (sameRByte & 0x06)>>1; isLossless = (sameRByte & 0x10)>>4; + + int isRandomAccess = (sameRByte >> 7) & 0x01; + + if(exe_params==NULL) + { + exe_params = (sz_exedata *)malloc(sizeof(struct sz_exedata)); + memset(exe_params, 0, sizeof(struct sz_exedata)); + } exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; sz_params* params = convertBytesToSZParams(&(bytes[index])); @@ -547,8 +539,8 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes) index++; //jump to the dataLength info byte address dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8 index += exe_params->SZ_SIZE_TYPE; - index += 4; //max_quant_intervals - + //index += 4; //max_quant_intervals + sz_metadata* metadata = (sz_metadata*)malloc(sizeof(struct sz_metadata)); metadata->versionNumber[0] = versions[0]; @@ -564,19 +556,27 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes) int defactoNBBins = 0; //real # bins if(isConstant==0 && isLossless==0) { - int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0; - if(metadata->conf_params->errorBoundMode >= PW_REL) + if(isRandomAccess==1) { - radExpoL = 1; - segmentL = exe_params->SZ_SIZE_TYPE; - pwrErrBoundBytesL = 4; + unsigned char* raBytes = &(bytes[index]); + defactoNBBins = bytesToInt_bigEndian(raBytes + sizeof(int) + sizeof(double)); } - - int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + 4 + 1 + 8 - + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE; - size_t nodeCount = bytesToInt_bigEndian(bytes+offset_typearray); - defactoNBBins = (nodeCount+1)/2; - } + else + { + int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0; + if(metadata->conf_params->errorBoundMode >= PW_REL) + { + radExpoL = 1; + segmentL = exe_params->SZ_SIZE_TYPE; + pwrErrBoundBytesL = 4; + } + + int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + params->dataType*4) + 1 + 8 + + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4; + defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray); + } + + } metadata->defactoNBBins = defactoNBBins; return metadata; @@ -779,23 +779,6 @@ size_t compute_total_batch_size() return totalSize; } -int isZlibFormat(unsigned char magic1, unsigned char magic2) -{ - if(magic1==104&&magic2==5) //DC+BS - return 1; - if(magic1==104&&magic2==129) //DC+DC - return 1; - if(magic1==104&&magic2==222) //DC+BC - return 1; - if(magic1==120&&magic2==1) //BC+BS - return 1; - if(magic1==120&&magic2==156) //BC+DC - return 1; - if(magic1==120&&magic2==218) //BC+BS - return 1; - return 0; -} - void SZ_registerVar(char* varName, int dataType, void* data, int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) diff --git a/thirdparty/SZ/sz/src/sz_double.c b/thirdparty/SZ/sz/src/sz_double.c index 51819bd..d8bf875 100644 --- a/thirdparty/SZ/sz/src/sz_double.c +++ b/thirdparty/SZ/sz/src/sz_double.c @@ -25,6 +25,7 @@ #include "zlib.h" #include "rw.h" #include "sz_double_ts.h" +#include "utility.h" unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize) { @@ -328,7 +329,7 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = fabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1516,8 +1517,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio) { if(errBoundMode>=PW_REL) { - //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max); - SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); + SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); + //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); } else SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue); @@ -1562,7 +1563,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa exit(0); return SZ_NSCS; } - } + } int status = SZ_SCES; size_t dataLength = computeDataLength(r5,r4,r3,r2,r1); @@ -1600,13 +1601,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa { if(confparams_cpr->errorBoundMode>=PW_REL) { - //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); - SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, - valueRangeSize, medianValue, &tmpOutSize); + SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); + //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); } else #ifdef HAVE_TIMECMPR - if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif @@ -1616,40 +1616,58 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa if (r3==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); else #ifdef HAVE_TIMECMPR if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif - SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); + } } else if (r4==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); else #ifdef HAVE_TIMECMPR if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif - SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); + } + + } else if (r5==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); else #ifdef HAVE_TIMECMPR if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else -#endif - SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); +#endif + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); + } + } else { @@ -1665,7 +1683,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else @@ -3121,11 +3139,8 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t if(radiusIndex>=confparams_cpr->maxRangeRadius) { radiusIndex = confparams_cpr->maxRangeRadius - 1; - //printf("radiusIndex=%d\n", radiusIndex); } intervals[radiusIndex]++; - // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData); - // fflush(stdout); offset_count += confparams_cpr->sampleDistance; if(offset_count >= r3){ n2_count ++; @@ -3141,9 +3156,6 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t } else data_pos += confparams_cpr->sampleDistance; } - // printf("sample_count: %ld\n", sample_count); - // fflush(stdout); - // if(*max_freq < 0.15) *max_freq *= 2; //compute the appropriate number size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; size_t sum = 0; @@ -3161,7 +3173,6 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t if(powerOf2<32) powerOf2 = 32; free(intervals); - //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); return powerOf2; } @@ -3172,7 +3183,7 @@ unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t double pred_value = 0, pred_err; size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); - size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance; + size_t totalSampleSize = 0; size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset size_t offset_count_2; @@ -3226,12 +3237,11 @@ unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength double pred_value = 0, pred_err; size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); - size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance; + size_t totalSampleSize = 0; double * data_pos = oriData + 2; while(data_pos - oriData < dataLength){ totalSampleSize++; - //pred_value = 2*data_pos[-1] - data_pos[-2]; pred_value = data_pos[-1]; pred_err = fabs(pred_value - *data_pos); radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); @@ -3260,6 +3270,2063 @@ unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength powerOf2 = 32; free(intervals); - //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); return powerOf2; } + +/*The above code is for sz 1.4.13; the following code is for sz 2.0*/ +unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) +{ + double mean = 0.0; + size_t len = r1 * r2; + size_t mean_distance = (int) (sqrt(len)); + + double * data_pos = oriData; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + double predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + double mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t n1_count = 1; + size_t offset_count = sampleDistance - 1; + size_t offset_count_2 = 0; + size_t sample_count = 0; + data_pos = oriData + r2 + offset_count; + while(data_pos - oriData < len){ + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=maxRangeRadius) + radiusIndex = maxRangeRadius - 1; + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % sampleDistance; + data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;i<maxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i<range-2; i++){ + tmp_sum = freq_pos[0] + freq_pos[1]; + if(tmp_sum > max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) +{ + double mean = 0.0; + size_t len = r1 * r2 * r3; + size_t mean_distance = (int) (sqrt(len)); + double * data_pos = oriData; + size_t offset_count = 0; + size_t offset_count_2 = 0; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + offset_count += mean_distance; + offset_count_2 += mean_distance; + if(offset_count >= r3){ + offset_count = 0; + data_pos -= 1; + } + if(offset_count_2 >= r2 * r3){ + offset_count_2 = 0; + data_pos -= 1; + } + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + double predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + double pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + double mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t sample_count = 0; + + offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + + while(data_pos - oriData < len){ + + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=maxRangeRadius) + { + radiusIndex = maxRangeRadius - 1; + } + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % sampleDistance; + data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;i<maxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i<range-2; i++){ + tmp_sum = freq_pos[0] + freq_pos[1]; + if(tmp_sum > max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +#define MIN(a, b) a<b? a : b +unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + double sz_sample_correct_freq = -1;//0.5; //-1 + double dense_pos; + double mean_flush_freq; + unsigned char use_mean = 0; + + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + // calculate block dims + size_t num_x, num_y; + size_t block_size = 16; + + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; + size_t num_blocks = num_x * num_y; + size_t num_elements = r1 * r2; + + size_t dim0_offset = r2; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + double * data_pos = oriData; + int * type = result_type; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + + double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); + double * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + + data_pos = oriData + offset_x * dim0_offset + offset_y; + + { + double * cur_data_pos = data_pos; + double fx = 0.0; + double fy = 0.0; + double f = 0; + double sum_x; + double curData; + for(size_t i=0; i<current_blockcount_x; i++){ + sum_x = 0; + for(size_t j=0; j<current_blockcount_y; j++){ + curData = *cur_data_pos; + sum_x += curData; + fy += curData * j; + cur_data_pos ++; + } + fx += sum_x * i; + f += sum_x; + cur_data_pos += dim0_offset - current_blockcount_y; + } + double coeff = 1.0 / (current_blockcount_x * current_blockcount_y); + reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); + reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); + reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); + } + + reg_params_pos ++; + } + } + + //Compress coefficient arrays + double precision_a, precision_b, precision_c; + double rel_param_err = 0.15/3; + precision_a = rel_param_err * realPrecision / late_blockcount_x; + precision_b = rel_param_err * realPrecision / late_blockcount_y; + precision_c = rel_param_err * realPrecision; + + double mean = 0; + use_mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i<num_elements; i++){ + if(fabs(oriData[i] - dense_pos) < realPrecision){ + sum += oriData[i]; + mean_count ++; + } + } + if(mean_count > 0) mean = sum / mean_count; + } + + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + double * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim0_offset = strip_dim_1; + unsigned char * indicator_pos = indicator; + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); + double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + double * cur_pb_buf = prediction_buffer_1; + double * next_pb_buf = prediction_buffer_2; + double * cur_pb_buf_pos; + double * next_pb_buf_pos; + int intvCapacity = exe_params->intvCapacity; + int intvRadius = exe_params->intvRadius; + int use_reg = 0; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + double last_coeffcients[3] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + double * coeff_unpred_data[3]; + double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double)); + double precision[3]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; + for(int i=0; i<3; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[3] = {0}; + if(use_mean){ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = oriData + offset_x * dim0_offset; + + cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; + next_pb_buf_pos = next_pb_buf + 1; + double * pb_pos = cur_pb_buf_pos; + double * next_pb_pos = next_pb_buf_pos; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + /*sampling: decide which predictor to use (regression or lorenzo)*/ + { + double * cur_data_pos; + double curData; + double pred_reg, pred_sz; + double err_sz = 0.0, err_reg = 0.0; + // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] + // [1, 9] [3, 7] [7, 3] [9, 1] + int count = 0; + for(int i=1; i<current_blockcount_x; i+=2){ + cur_data_pos = data_pos + i * dim0_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; + + err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); + + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i * dim0_offset + (block_size - i); + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; + err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); + + err_reg += fabs(pred_reg - curData); + + count += 2; + } + + use_reg = (err_reg < err_sz); + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + double cur_coeff; + double diff, itvNum; + for(int e=0; e<3; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + }// end use mean + else{ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = oriData + offset_x * dim0_offset; + + cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; + next_pb_buf_pos = next_pb_buf + 1; + double * pb_pos = cur_pb_buf_pos; + double * next_pb_pos = next_pb_buf_pos; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + /*sampling*/ + { + // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] + double * cur_data_pos; + double curData; + double pred_reg, pred_sz; + double err_sz = 0.0, err_reg = 0.0; + // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] + // [1, 9] [3, 7] [7, 3] [9, 1] + int count = 0; + for(int i=1; i<current_blockcount_x; i+=2){ + cur_data_pos = data_pos + i * dim0_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; + err_sz += fabs(pred_sz - curData); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i * dim0_offset + (block_size - i); + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; + err_sz += fabs(pred_sz - curData); + err_reg += fabs(pred_reg - curData); + + count += 2; + } + err_sz += realPrecision * count * 0.81; + use_reg = (err_reg < err_sz); + + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + double cur_coeff; + double diff, itvNum; + for(int e=0; e<3; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + size_t i = 0; + init(huffmanTree, result_type, num_elements); + for (i = 0; i < stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + result_pos += meta_data_offset; + + sizeToBytes(result_pos, num_elements); + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(double)); + result_pos += sizeof(double); + + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count>0){ + for(int e=0; e<3; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); + result_pos += coeff_unpredictable_count[e]*sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); + result_pos += total_unpred * sizeof(double); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + + return result; +} + +unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + double sz_sample_correct_freq = -1;//0.5; //-1 + double dense_pos; + double mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + size_t max_unpred_count = 0; + double * data_pos = oriData; + int * type = result_type; + size_t type_offset; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + + double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); + double * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + /*Calculate regression coefficients*/ + { + double * cur_data_pos = data_pos; + double fx = 0.0; + double fy = 0.0; + double fz = 0.0; + double f = 0; + double sum_x, sum_y; + double curData; + for(size_t i=0; i<current_blockcount_x; i++){ + sum_x = 0; + for(size_t j=0; j<current_blockcount_y; j++){ + sum_y = 0; + for(size_t k=0; k<current_blockcount_z; k++){ + curData = *cur_data_pos; + // f += curData; + // fx += curData * i; + // fy += curData * j; + // fz += curData * k; + sum_y += curData; + fz += curData * k; + cur_data_pos ++; + } + fy += sum_y * j; + sum_x += sum_y; + cur_data_pos += dim1_offset - current_blockcount_z; + } + fx += sum_x * i; + f += sum_x; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); + reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); + reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); + reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); + reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); + } + reg_params_pos ++; + } + } + } + + //Compress coefficient arrays + double precision_a, precision_b, precision_c, precision_d; + double rel_param_err = 0.025; + precision_a = rel_param_err * realPrecision / late_blockcount_x; + precision_b = rel_param_err * realPrecision / late_blockcount_y; + precision_c = rel_param_err * realPrecision / late_blockcount_z; + precision_d = rel_param_err * realPrecision; + + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + double mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i<num_elements; i++){ + if(fabs(oriData[i] - dense_pos) < realPrecision){ + sum += oriData[i]; + mean_count ++; + } + } + if(mean_count > 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + double * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim_2 = r3 + 1; + size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; + size_t strip_dim1_offset = strip_dim_2; + unsigned char * indicator_pos = indicator; + + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); + double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + double * cur_pb_buf = prediction_buffer_1; + double * next_pb_buf = prediction_buffer_2; + double * cur_pb_buf_pos; + double * next_pb_buf_pos; + int intvCapacity = exe_params->intvCapacity; + int intvRadius = exe_params->intvRadius; + int use_reg = 0; + double noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + double last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + double * coeff_unpred_data[4]; + double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double)); + double precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; + type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; + type = result_type + type_offset; + + // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) + cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; + next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; + + size_t current_blockcount_z; + double * pb_pos = cur_pb_buf_pos; + double * next_pb_pos = next_pb_buf_pos; + size_t strip_unpredictable_count = 0; + for(size_t k=0; k<num_z; k++){ + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + /*sampling and decide which predictor*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + double * cur_data_pos; + double curData; + double pred_reg, pred_sz; + double err_sz = 0.0, err_reg = 0.0; + int bmi = 0; + if(i>0 && j>0 && k>0){ + for(int i=0; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + } + } + else{ + for(int i=1; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + } + } + use_reg = (err_reg < err_sz); + } + if(use_reg){ + { + /*predict coefficients in current block via previous reg_block*/ + double cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } // end k + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + }// end j + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; + // copy bottom plane from plane buffer + // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double)); + type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; + type = result_type + type_offset; + + // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) + cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; + next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; + + size_t current_blockcount_z; + double * pb_pos = cur_pb_buf_pos; + double * next_pb_pos = next_pb_buf_pos; + size_t strip_unpredictable_count = 0; + for(size_t k=0; k<num_z; k++){ + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + /*sampling*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + double * cur_data_pos; + double curData; + double pred_reg, pred_sz; + double err_sz = 0.0, err_reg = 0.0; + int bmi; + if(i>0 && j>0 && k>0){ + for(int i=0; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + } + } + else{ + for(int i=1; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + } + } + use_reg = (err_reg < err_sz); + + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + double cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + double curData; + double pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + double * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + double * cur_pb_pos = pb_pos; + double * cur_data_pos = data_pos; + double curData; + double pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + } + double * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + } + } + + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(double)); + result_pos += sizeof(double); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); + result_pos += coeff_unpredictable_count[e]*sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); + result_pos += total_unpred * sizeof(double); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} diff --git a/thirdparty/SZ/sz/src/sz_double_pwr.c b/thirdparty/SZ/sz/src/sz_double_pwr.c index 59be38c..881d495 100644 --- a/thirdparty/SZ/sz/src/sz_double_pwr.c +++ b/thirdparty/SZ/sz/src/sz_double_pwr.c @@ -23,6 +23,7 @@ #include "sz_double_pwr.h" #include "zlib.h" #include "rw.h" +#include "utility.h" void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) { @@ -1772,3 +1773,188 @@ size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, free_TightDataPointStorageD(tdps); } + +#include <stdbool.h> + +void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double min, double max){ + + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(double)) + SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double min, double max){ + + size_t dataLength = r1 * r2; + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(double)) + SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} + +void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max){ + + size_t dataLength = r1 * r2 * r3; + double * log_data = (double *) malloc(dataLength * sizeof(double)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + double max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + double min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + double valueRangeSize, medianValue_f; + computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_double(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(double)) + SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageD(tdps); +} diff --git a/thirdparty/SZ/sz/src/sz_double_ts.c b/thirdparty/SZ/sz/src/sz_double_ts.c index b83562e..de9f7cd 100644 --- a/thirdparty/SZ/sz/src/sz_double_ts.c +++ b/thirdparty/SZ/sz/src/sz_double_ts.c @@ -66,7 +66,7 @@ unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength, TightDataPointStorageD* SZ_compress_double_1D_MDQ_ts(double *oriData, size_t dataLength, sz_multisteps* multisteps, double realPrecision, double valueRangeSize, double medianValue_d) { -double* preStepData = (double*)(multisteps->hist_data); + double* preStepData = (double*)(multisteps->hist_data); //store the decompressed data double* decData = (double*)malloc(sizeof(double)*dataLength); memset(decData, 0, sizeof(double)*dataLength); diff --git a/thirdparty/SZ/sz/src/sz_float.c b/thirdparty/SZ/sz/src/sz_float.c index c0a2a18..74d7f20 100644 --- a/thirdparty/SZ/sz/src/sz_float.c +++ b/thirdparty/SZ/sz/src/sz_float.c @@ -1,6 +1,6 @@ /** * @file sz_float.c - * @author Sheng Di and Dingwen Tao + * @author Sheng Di, Dingwen Tao, Xin Liang * @date Aug, 2016 * @brief SZ_Init, Compression and Decompression functions * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. @@ -25,6 +25,7 @@ #include "zlib.h" #include "rw.h" #include "sz_float_ts.h" +#include "utility.h" unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize) { @@ -405,7 +406,7 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = fabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1356,8 +1357,11 @@ char SZ_compress_args_float_NoCkRngeNoGzip_3D(unsigned char** newByteData, float compressionType = 1; //time-series based compression } else - { - tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + { + if(sz_with_regression == SZ_NO_REGRESSION) + tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + else + *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); compressionType = 0; //snapshot-based compression multisteps->lastSnapshotStep = timestep; } @@ -1366,14 +1370,14 @@ char SZ_compress_args_float_NoCkRngeNoGzip_3D(unsigned char** newByteData, float #endif tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + if(tdps!=NULL) + { + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(float)) + SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); + free_TightDataPointStorageF(tdps); + } - convertTDPStoFlatBytes_float(tdps, newByteData, outSize); - - if(*outSize>dataLength*sizeof(float)) - SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); - - free_TightDataPointStorageF(tdps); - return compressionType; } @@ -1770,8 +1774,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio) { if(errBoundMode>=PW_REL) { - //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max); - SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); + SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); + //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); } else SZ_compress_args_float_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue); @@ -1779,21 +1783,21 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio) else if(r5==0&&r4==0&&r3==0) { if(errBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max); else SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); } else if(r5==0&&r4==0) { if(errBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max); else SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); } else if(r5==0) { if(errBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max); else SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); } @@ -1854,9 +1858,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa { if(confparams_cpr->errorBoundMode>=PW_REL) { - //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); - SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, - valueRangeSize, medianValue, &tmpOutSize); + SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); + //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); } else #ifdef HAVE_TIMECMPR @@ -1870,33 +1873,43 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa if (r3==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); else #ifdef HAVE_TIMECMPR if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif - SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); + } } else if (r4==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); else #ifdef HAVE_TIMECMPR if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) - multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif - SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); + } } else if (r5==0) { if(confparams_cpr->errorBoundMode>=PW_REL) - SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max); + SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); //ToDO //SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max); else @@ -1905,7 +1918,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); else #endif - SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + { + if(sz_with_regression == SZ_NO_REGRESSION) + SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); + else + tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); + } } else { @@ -1920,7 +1938,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else @@ -3374,7 +3392,7 @@ unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r float pred_value = 0, pred_err; size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); - size_t totalSampleSize = 0;//(r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance; + size_t totalSampleSize = 0; size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset size_t offset_count_2; @@ -3389,11 +3407,8 @@ unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r if(radiusIndex>=confparams_cpr->maxRangeRadius) { radiusIndex = confparams_cpr->maxRangeRadius - 1; - //printf("radiusIndex=%d\n", radiusIndex); } intervals[radiusIndex]++; - // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData); - // fflush(stdout); offset_count += confparams_cpr->sampleDistance; if(offset_count >= r3){ n2_count ++; @@ -3409,9 +3424,6 @@ unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r } else data_pos += confparams_cpr->sampleDistance; } - // printf("sample_count: %ld\n", sample_count); - // fflush(stdout); - // if(*max_freq < 0.15) *max_freq *= 2; //compute the appropriate number size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; size_t sum = 0; @@ -3429,7 +3441,6 @@ unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r if(powerOf2<32) powerOf2 = 32; free(intervals); - //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2); return powerOf2; } @@ -3749,10 +3760,7 @@ unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r float pred_value = 0, pred_err; size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); - size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance; - - //float max = oriData[0]; - //float min = oriData[0]; + size_t totalSampleSize = 0; size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset size_t offset_count_2; @@ -3811,7 +3819,6 @@ unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, float * data_pos = oriData + 2; while(data_pos - oriData < dataLength){ totalSampleSize++; - //pred_value = 2*data_pos[-1] - data_pos[-2]; pred_value = data_pos[-1]; pred_err = fabs(pred_value - *data_pos); radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); @@ -3840,7 +3847,6 @@ unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, powerOf2 = 32; free(intervals); - //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2); return powerOf2; } @@ -4036,3 +4042,2790 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s return unpredictable_count; } +/*The above code is for sz 1.4.13; the following code is for sz 2.0*/ + +unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) +{ + float mean = 0.0; + size_t len = r1 * r2; + size_t mean_distance = (int) (sqrt(len)); + + float * data_pos = oriData; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + float predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + float mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t n1_count = 1; + size_t offset_count = sampleDistance - 1; + size_t offset_count_2 = 0; + size_t sample_count = 0; + data_pos = oriData + r2 + offset_count; + while(data_pos - oriData < len){ + pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); + if(radiusIndex>=maxRangeRadius) + radiusIndex = maxRangeRadius - 1; + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r2){ + n1_count ++; + offset_count_2 = n1_count % sampleDistance; + data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;i<maxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i<range-2; i++){ + tmp_sum = freq_pos[0] + freq_pos[1]; + if(tmp_sum > max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + +// 2D: modified for higher performance +#define MIN(a, b) a<b? a : b +unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + // calculate block dims + size_t num_x, num_y; + size_t block_size = 16; + + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; + size_t num_blocks = num_x * num_y; + size_t num_elements = r1 * r2; + + size_t dim0_offset = r2; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + float * data_pos = oriData; + int * type = result_type; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + + data_pos = oriData + offset_x * dim0_offset + offset_y; + + { + float * cur_data_pos = data_pos; + float fx = 0.0; + float fy = 0.0; + float f = 0; + double sum_x; + float curData; + for(size_t i=0; i<current_blockcount_x; i++){ + sum_x = 0; + for(size_t j=0; j<current_blockcount_y; j++){ + curData = *cur_data_pos; + sum_x += curData; + fy += curData * j; + cur_data_pos ++; + } + fx += sum_x * i; + f += sum_x; + cur_data_pos += dim0_offset - current_blockcount_y; + } + float coeff = 1.0 / (current_blockcount_x * current_blockcount_y); + reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); + reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); + reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); + } + + reg_params_pos ++; + } + } + + //Compress coefficient arrays + double precision_a, precision_b, precision_c; + float rel_param_err = 0.15/3; + precision_a = rel_param_err * realPrecision / late_blockcount_x; + precision_b = rel_param_err * realPrecision / late_blockcount_y; + precision_c = rel_param_err * realPrecision; + + float mean = 0; + use_mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i<num_elements; i++){ + if(fabs(oriData[i] - dense_pos) < realPrecision){ + sum += oriData[i]; + mean_count ++; + } + } + if(mean_count > 0) mean = sum / mean_count; + } + + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim0_offset = strip_dim_1; + unsigned char * indicator_pos = indicator; + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); + float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + float * cur_pb_buf = prediction_buffer_1; + float * next_pb_buf = prediction_buffer_2; + float * cur_pb_buf_pos; + float * next_pb_buf_pos; + int intvCapacity = exe_params->intvCapacity; + int intvRadius = exe_params->intvRadius; + int use_reg = 0; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[3] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + float * coeff_unpred_data[3]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*3*sizeof(float)); + double precision[3]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; + for(int i=0; i<3; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[3] = {0}; + if(use_mean){ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = oriData + offset_x * dim0_offset; + + cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; + next_pb_buf_pos = next_pb_buf + 1; + float * pb_pos = cur_pb_buf_pos; + float * next_pb_pos = next_pb_buf_pos; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + /*sampling: decide which predictor to use (regression or lorenzo)*/ + { + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] + // [1, 9] [3, 7] [7, 3] [9, 1] + int count = 0; + for(int i=1; i<current_blockcount_x; i+=2){ + cur_data_pos = data_pos + i * dim0_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; + + err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); + + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i * dim0_offset + (block_size - i); + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; + err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); + + err_reg += fabs(pred_reg - curData); + + count += 2; + } + + use_reg = (err_reg < err_sz); + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<3; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + }// end use mean + else{ + type = result_type; + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = oriData + offset_x * dim0_offset; + + cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; + next_pb_buf_pos = next_pb_buf + 1; + float * pb_pos = cur_pb_buf_pos; + float * next_pb_pos = next_pb_buf_pos; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + /*sampling*/ + { + // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] + // [1, 9] [3, 7] [7, 3] [9, 1] + int count = 0; + for(int i=1; i<current_blockcount_x; i+=2){ + cur_data_pos = data_pos + i * dim0_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; + err_sz += fabs(pred_sz - curData); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i * dim0_offset + (block_size - i); + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; + err_sz += fabs(pred_sz - curData); + err_reg += fabs(pred_reg - curData); + + count += 2; + } + err_sz += realPrecision * count * 0.81; + use_reg = (err_reg < err_sz); + + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<3; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + index ++; + cur_data_pos ++; + } + /*dealing with the last jj (boundary)*/ + { + // jj == current_blockcount_y - 1 + size_t jj = current_blockcount_y - 1; + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; + diff = curData - pred; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj] = pred; + // assign value to next prediction buffer + next_pb_pos[jj] = pred; + + index ++; + cur_data_pos ++; + } + } // end ii == -1 + unpredictable_count = block_unpredictable_count; + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + }// end use_reg + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred2D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y; + cur_data_pos += dim0_offset - current_blockcount_y; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + curData = *cur_data_pos; + + pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; + diff = curData - pred2D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + next_pb_pos[jj] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[j] = 1; + }// end SZ + reg_params_pos ++; + data_pos += current_blockcount_y; + pb_pos += current_blockcount_y; + next_pb_pos += current_blockcount_y; + type += current_blockcount_x * current_blockcount_y; + }// end j + indicator_pos += num_y; + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + size_t i = 0; + init(huffmanTree, result_type, num_elements); + for (i = 0; i < stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + result_pos += meta_data_offset; + + sizeToBytes(result_pos, num_elements); + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count>0){ + for(int e=0; e<3; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + + return result; +} + +unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) +{ + float mean = 0.0; + size_t len = r1 * r2 * r3; + size_t mean_distance = (int) (sqrt(len)); + float * data_pos = oriData; + size_t offset_count = 0; + size_t offset_count_2 = 0; + size_t mean_count = 0; + while(data_pos - oriData < len){ + mean += *data_pos; + mean_count ++; + data_pos += mean_distance; + offset_count += mean_distance; + offset_count_2 += mean_distance; + if(offset_count >= r3){ + offset_count = 0; + data_pos -= 1; + } + if(offset_count_2 >= r2 * r3){ + offset_count_2 = 0; + data_pos -= 1; + } + } + if(mean_count > 0) mean /= mean_count; + size_t range = 8192; + size_t radius = 4096; + size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); + memset(freq_intervals, 0, range*sizeof(size_t)); + + unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; + int sampleDistance = confparams_cpr->sampleDistance; + float predThreshold = confparams_cpr->predThreshold; + + size_t i; + size_t radiusIndex; + size_t r23=r2*r3; + float pred_value = 0, pred_err; + size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); + memset(intervals, 0, maxRangeRadius*sizeof(size_t)); + + float mean_diff; + ptrdiff_t freq_index; + size_t freq_count = 0; + size_t sample_count = 0; + + offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset + data_pos = oriData + r23 + r3 + offset_count; + size_t n1_count = 1, n2_count = 1; // count i,j sum + + while(data_pos - oriData < len){ + + pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; + pred_err = fabs(pred_value - *data_pos); + if(pred_err < realPrecision) freq_count ++; + radiusIndex = (pred_err/realPrecision+1)/2; + if(radiusIndex>=maxRangeRadius) + { + radiusIndex = maxRangeRadius - 1; + } + intervals[radiusIndex]++; + + mean_diff = *data_pos - mean; + if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; + else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; + if(freq_index <= 0){ + freq_intervals[0] ++; + } + else if(freq_index >= range){ + freq_intervals[range - 1] ++; + } + else{ + freq_intervals[freq_index] ++; + } + offset_count += sampleDistance; + if(offset_count >= r3){ + n2_count ++; + if(n2_count == r2){ + n1_count ++; + n2_count = 1; + data_pos += r3; + } + offset_count_2 = (n1_count + n2_count) % sampleDistance; + data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); + offset_count = (sampleDistance - offset_count_2); + if(offset_count == 0) offset_count ++; + } + else data_pos += sampleDistance; + sample_count ++; + } + *max_freq = freq_count * 1.0/ sample_count; + + //compute the appropriate number + size_t targetCount = sample_count*predThreshold; + size_t sum = 0; + for(i=0;i<maxRangeRadius;i++) + { + sum += intervals[i]; + if(sum>targetCount) + break; + } + if(i>=maxRangeRadius) + i = maxRangeRadius-1; + unsigned int accIntervals = 2*(i+1); + unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); + + if(powerOf2<32) + powerOf2 = 32; + // collect frequency + size_t max_sum = 0; + size_t max_index = 0; + size_t tmp_sum; + size_t * freq_pos = freq_intervals + 1; + for(size_t i=1; i<range-2; i++){ + tmp_sum = freq_pos[0] + freq_pos[1]; + if(tmp_sum > max_sum){ + max_sum = tmp_sum; + max_index = i; + } + freq_pos ++; + } + *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); + *mean_freq = max_sum * 1.0 / sample_count; + + free(freq_intervals); + free(intervals); + return powerOf2; +} + + +// 3D: modified for higher performance +unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + +#ifdef HAVE_TIMECMPR + float* decData = NULL; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData = (float*)(multisteps->hist_data); +#endif + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + size_t max_unpred_count = 0; + float * data_pos = oriData; + int * type = result_type; + size_t type_offset; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + /*Calculate regression coefficients*/ + { + float * cur_data_pos = data_pos; + float fx = 0.0; + float fy = 0.0; + float fz = 0.0; + float f = 0; + float sum_x, sum_y; + float curData; + for(size_t i=0; i<current_blockcount_x; i++){ + sum_x = 0; + for(size_t j=0; j<current_blockcount_y; j++){ + sum_y = 0; + for(size_t k=0; k<current_blockcount_z; k++){ + curData = *cur_data_pos; + // f += curData; + // fx += curData * i; + // fy += curData * j; + // fz += curData * k; + sum_y += curData; + fz += curData * k; + cur_data_pos ++; + } + fy += sum_y * j; + sum_x += sum_y; + cur_data_pos += dim1_offset - current_blockcount_z; + } + fx += sum_x * i; + f += sum_x; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); + reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); + reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); + reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); + reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); + } + reg_params_pos ++; + } + } + } + + //Compress coefficient arrays + double precision_a, precision_b, precision_c, precision_d; + float rel_param_err = 0.025; + precision_a = rel_param_err * realPrecision / late_blockcount_x; + precision_b = rel_param_err * realPrecision / late_blockcount_y; + precision_c = rel_param_err * realPrecision / late_blockcount_z; + precision_d = rel_param_err * realPrecision; + + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i<num_elements; i++){ + if(fabs(oriData[i] - dense_pos) < realPrecision){ + sum += oriData[i]; + mean_count ++; + } + } + if(mean_count > 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + size_t strip_dim_0 = early_blockcount_x + 1; + size_t strip_dim_1 = r2 + 1; + size_t strip_dim_2 = r3 + 1; + size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; + size_t strip_dim1_offset = strip_dim_2; + unsigned char * indicator_pos = indicator; + + size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); + float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_1, 0, prediction_buffer_size); + float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); + memset(prediction_buffer_2, 0, prediction_buffer_size); + float * cur_pb_buf = prediction_buffer_1; + float * next_pb_buf = prediction_buffer_2; + float * cur_pb_buf_pos; + float * next_pb_buf_pos; + int intvCapacity = exe_params->intvCapacity; + int intvRadius = exe_params->intvRadius; + int use_reg = 0; + float noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + float * coeff_unpred_data[4]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); + double precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; + type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; + type = result_type + type_offset; + + // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) + cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; + next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; + + size_t current_blockcount_z; + float * pb_pos = cur_pb_buf_pos; + float * next_pb_pos = next_pb_buf_pos; + size_t strip_unpredictable_count = 0; + for(size_t k=0; k<num_z; k++){ + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; +#ifdef HAVE_TIMECMPR + size_t offset_z = 0; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; +#endif + /*sampling and decide which predictor*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + int bmi = 0; + if(i>0 && j>0 && k>0){ + for(int i=0; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + } + } + else{ + for(int i=1; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + } + } + use_reg = (err_reg < err_sz); + } + if(use_reg){ + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + // adjust type[index] to intvRadius for coherence with freq in reg + type[index] = intvRadius; + *cur_pb_pos = mean; + } + else + { + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + if(type[index] <= intvRadius) type[index] -= 1; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } // end k + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + }// end j + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + for(size_t i=0; i<num_x; i++){ + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + + for(size_t j=0; j<num_y; j++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; + // copy bottom plane from plane buffer + // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float)); + type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; + type = result_type + type_offset; + + // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) + cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; + next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; + + size_t current_blockcount_z; + float * pb_pos = cur_pb_buf_pos; + float * next_pb_pos = next_pb_buf_pos; + size_t strip_unpredictable_count = 0; + for(size_t k=0; k<num_z; k++){ + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; +#ifdef HAVE_TIMECMPR + size_t offset_z = 0; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; +#endif + /*sampling*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + int bmi; + if(i>0 && j>0 && k>0){ + for(int i=0; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + } + } + else{ + for(int i=1; i<block_size; i++){ + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + } + } + use_reg = (err_reg < err_sz); + + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = data_pos; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + size_t ii = current_blockcount_x - 1; + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = pred; +#endif + + if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ + // assign value to block surfaces + pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; + } + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = pred; + index ++; + cur_data_pos ++; + } + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + unpredictable_count = block_unpredictable_count; + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_pb_pos = pb_pos; + float * cur_data_pos = data_pos; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; + cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + /*dealing with the last ii (boundary)*/ + { + // ii == current_blockcount_x - 1 + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + + curData = *cur_data_pos; + pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] + - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_pb_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + +#ifdef HAVE_TIMECMPR + size_t ii = current_blockcount_x - 1; + size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; + if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) + decData[block_offset + point_offset] = *cur_pb_pos; +#endif + + // assign value to next prediction buffer + next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; + index ++; + cur_pb_pos ++; + cur_data_pos ++; + } + cur_pb_pos += strip_dim1_offset - current_blockcount_z; + cur_data_pos += dim1_offset - current_blockcount_z; + } + } + strip_unpredictable_count += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + + reg_params_pos ++; + data_pos += current_blockcount_z; + pb_pos += current_blockcount_z; + next_pb_pos += current_blockcount_z; + type += current_blockcount_x * current_blockcount_y * current_blockcount_z; + + } + + if(strip_unpredictable_count > max_unpred_count){ + max_unpred_count = strip_unpredictable_count; + } + total_unpred += strip_unpredictable_count; + indicator_pos += num_z; + } + float * tmp; + tmp = cur_pb_buf; + cur_pb_buf = next_pb_buf; + next_pb_buf = tmp; + } + } + + free(prediction_buffer_1); + free(prediction_buffer_2); + + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} + +unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + + unsigned int quantization_intervals; + float sz_sample_correct_freq = -1;//0.5; //-1 + float dense_pos; + float mean_flush_freq; + unsigned char use_mean = 0; + + // calculate block dims + size_t num_x, num_y, num_z; + size_t block_size = 6; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); + size_t total_unpred = 0; + size_t unpredictable_count; + float * data_pos = oriData; + int * type = result_type; + float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); + float * reg_params_pos = reg_params; + // move regression part out + size_t params_offset_b = num_blocks; + size_t params_offset_c = 2*num_blocks; + size_t params_offset_d = 3*num_blocks; + float * pred_buffer = (float *) malloc((block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); + float * pred_buffer_pos = NULL; + float * block_data_pos_x = NULL; + float * block_data_pos_y = NULL; + float * block_data_pos_z = NULL; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; + pred_buffer_pos = pred_buffer; + block_data_pos_x = data_pos; + // use the buffer as block_size*block_size*block_size + for(int ii=0; ii<block_size; ii++){ + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj<block_size; jj++){ + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk<block_size; kk++){ + *pred_buffer_pos = *block_data_pos_z; + if(k*block_size + kk + 1 < r3) block_data_pos_z ++; + pred_buffer_pos ++; + } + if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset; + } + if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset; + } + /*Calculate regression coefficients*/ + { + float * cur_data_pos = pred_buffer; + float fx = 0.0; + float fy = 0.0; + float fz = 0.0; + float f = 0; + float sum_x, sum_y; + float curData; + for(size_t i=0; i<block_size; i++){ + sum_x = 0; + for(size_t j=0; j<block_size; j++){ + sum_y = 0; + for(size_t k=0; k<block_size; k++){ + curData = *cur_data_pos; + sum_y += curData; + fz += curData * k; + cur_data_pos ++; + } + fy += sum_y * j; + sum_x += sum_y; + } + fx += sum_x * i; + f += sum_x; + } + float coeff = 1.0 / (block_size * block_size * block_size); + reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1); + reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1); + reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1); + reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2); + } + reg_params_pos ++; + } + } + } + + //Compress coefficient arrays + double precision_a, precision_b, precision_c, precision_d; + float rel_param_err = 0.025; + precision_a = rel_param_err * realPrecision / block_size; + precision_b = rel_param_err * realPrecision / block_size; + precision_c = rel_param_err * realPrecision / block_size; + precision_d = rel_param_err * realPrecision; + + if(exe_params->optQuantMode==1) + { + quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); + if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + + float mean = 0; + if(use_mean){ + // compute mean + double sum = 0.0; + size_t mean_count = 0; + for(size_t i=0; i<num_elements; i++){ + if(fabs(oriData[i] - dense_pos) < realPrecision){ + sum += oriData[i]; + mean_count ++; + } + } + if(mean_count > 0) mean = sum / mean_count; + } + + double tmp_realPrecision = realPrecision; + + // use two prediction buffers for higher performance + float * unpredictable_data = result_unpredictable_data; + unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); + memset(indicator, 0, num_blocks * sizeof(unsigned char)); + size_t reg_count = 0; + unsigned char * indicator_pos = indicator; + + int intvCapacity = exe_params->intvCapacity; + int intvRadius = exe_params->intvRadius; + int use_reg = 0; + float noise = realPrecision * 1.22; + + reg_params_pos = reg_params; + // compress the regression coefficients on the fly + float last_coeffcients[4] = {0.0}; + int coeff_intvCapacity_sz = 65536; + int coeff_intvRadius = coeff_intvCapacity_sz / 2; + int * coeff_type[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + float * coeff_unpred_data[4]; + float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); + double precision[4]; + precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + for(int i=0; i<4; i++){ + coeff_type[i] = coeff_result_type + i * num_blocks; + coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; + } + int coeff_index = 0; + unsigned int coeff_unpredictable_count[4] = {0}; + + memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); + int pred_buffer_block_size = block_size + 1; + int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size; + int strip_dim1_offset = pred_buffer_block_size; + + if(use_mean){ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; + // add 1 in x, y, z offset + pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + block_data_pos_x = data_pos; + for(int ii=0; ii<block_size; ii++){ + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj<block_size; jj++){ + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk<block_size; kk++){ + *pred_buffer_pos = *block_data_pos_z; + if(k*block_size + kk + 1< r3) block_data_pos_z ++; + pred_buffer_pos ++; + } + // add 1 in z offset + pred_buffer_pos ++; + if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset; + } + // add 1 in y offset + pred_buffer_pos += pred_buffer_block_size; + if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset; + } + /*sampling and decide which predictor*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + int bmi = 0; + for(int i=2; i<=block_size; i++){ + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); + err_reg += fabs(pred_reg - curData); + } + + use_reg = (err_reg < err_sz); + } + if(use_reg){ + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + + curData = *cur_data_pos; + if(fabs(curData - mean) <= realPrecision){ + type[index] = 1; + *cur_data_pos = mean; + } + else + { + pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] + - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + reg_params_pos ++; + type += block_size * block_size * block_size; + } // end k + indicator_pos += num_z; + }// end j + }// end i + } + else{ + int intvCapacity_sz = intvCapacity - 2; + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; + // add 1 in x, y, z offset + pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + block_data_pos_x = data_pos; + for(int ii=0; ii<block_size; ii++){ + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj<block_size; jj++){ + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk<block_size; kk++){ + *pred_buffer_pos = *block_data_pos_z; + if(k*block_size + kk < r3) block_data_pos_z ++; + pred_buffer_pos ++; + } + // add 1 in z offset + pred_buffer_pos ++; + if(j*block_size + jj < r2) block_data_pos_y += dim1_offset; + } + // add 1 in y offset + pred_buffer_pos += pred_buffer_block_size; + if(i*block_size + ii < r1) block_data_pos_x += dim0_offset; + } + /*sampling*/ + { + // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] + float * cur_data_pos; + float curData; + float pred_reg, pred_sz; + float err_sz = 0.0, err_reg = 0.0; + int bmi; + for(int i=2; i<=block_size; i++){ + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + bmi = block_size - i; + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + + cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; + curData = *cur_data_pos; + pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; + err_sz += fabs(pred_sz - curData) + noise; + err_reg += fabs(pred_reg - curData); + } + + use_reg = (err_reg < err_sz); + + } + if(use_reg) + { + { + /*predict coefficients in current block via previous reg_block*/ + float cur_coeff; + double diff, itvNum; + for(int e=0; e<4; e++){ + cur_coeff = reg_params_pos[e*num_blocks]; + diff = cur_coeff - last_coeffcients[e]; + itvNum = fabs(diff)/precision[e] + 1; + if (itvNum < coeff_intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; + last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + else{ + coeff_type[e][coeff_index] = 0; + last_coeffcients[e] = cur_coeff; + coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; + } + } + coeff_index ++; + } + float curData; + float pred; + double itvNum; + double diff; + size_t index = 0; + size_t block_unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + curData = *cur_data_pos; + pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; + diff = curData - pred; + itvNum = fabs(diff)/tmp_realPrecision + 1; + if (itvNum < intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - pred)>tmp_realPrecision){ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + pred = curData; + unpredictable_data[block_unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += block_unpredictable_count; + unpredictable_data += block_unpredictable_count; + reg_count ++; + } + else{ + // use SZ + // SZ predication + unpredictable_count = 0; + float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; + float curData; + float pred3D; + double itvNum, diff; + size_t index = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + curData = *cur_data_pos; + pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] + - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; + diff = curData - pred3D; + itvNum = fabs(diff)/realPrecision + 1; + if (itvNum < intvCapacity_sz){ + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + intvRadius; + *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[index] = 0; + *cur_data_pos = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + index ++; + cur_data_pos ++; + } + cur_data_pos ++; + } + cur_data_pos += pred_buffer_block_size; + } + total_unpred += unpredictable_count; + unpredictable_data += unpredictable_count; + // change indicator + indicator_pos[k] = 1; + }// end SZ + reg_params_pos ++; + type += block_size * block_size * block_size; + } + indicator_pos += num_z; + } + } + } + free(pred_buffer); + int stateNum = 2*quantization_intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + size_t nodeCount = 0; + init(huffmanTree, result_type, num_blocks*max_num_block_elements); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements + unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); + unsigned char * result_pos = result; + initRandomAccessBytes(result_pos); + + result_pos += meta_data_offset; + + sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 + result_pos += exe_params->SZ_SIZE_TYPE; + + intToBytes_bigEndian(result_pos, block_size); + result_pos += sizeof(int); + doubleToBytes(result_pos, realPrecision); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, quantization_intervals); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + + memcpy(result_pos, &use_mean, sizeof(unsigned char)); + result_pos += sizeof(unsigned char); + memcpy(result_pos, &mean, sizeof(float)); + result_pos += sizeof(float); + size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); + result_pos += indicator_size; + + //convert the lead/mid/resi to byte stream + if(reg_count > 0){ + for(int e=0; e<4; e++){ + int stateNum = 2*coeff_intvCapacity_sz; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + size_t nodeCount = 0; + init(huffmanTree, coeff_type[e], reg_count); + size_t i = 0; + for (i = 0; i < huffmanTree->stateNum; i++) + if (huffmanTree->code[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + doubleToBytes(result_pos, precision[e]); + result_pos += sizeof(double); + intToBytes_bigEndian(result_pos, coeff_intvRadius); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, treeByteSize); + result_pos += sizeof(int); + intToBytes_bigEndian(result_pos, nodeCount); + result_pos += sizeof(int); + memcpy(result_pos, treeBytes, treeByteSize); + result_pos += treeByteSize; + free(treeBytes); + size_t typeArray_size = 0; + encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); + sizeToBytes(result_pos, typeArray_size); + result_pos += sizeof(size_t) + typeArray_size; + intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); + result_pos += sizeof(int); + memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); + result_pos += coeff_unpredictable_count[e]*sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + free(coeff_result_type); + free(coeff_unpredictable_data); + + //record the number of unpredictable data and also store them + memcpy(result_pos, &total_unpred, sizeof(size_t)); + result_pos += sizeof(size_t); + memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); + result_pos += total_unpred * sizeof(float); + size_t typeArray_size = 0; + encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size); + result_pos += typeArray_size; + size_t totalEncodeSize = result_pos - result; + free(indicator); + free(result_unpredictable_data); + free(result_type); + free(reg_params); + + + SZ_ReleaseHuffman(huffmanTree); + *comp_size = totalEncodeSize; + return result; +} diff --git a/thirdparty/SZ/sz/src/sz_float_pwr.c b/thirdparty/SZ/sz/src/sz_float_pwr.c index 644afdd..92a449f 100644 --- a/thirdparty/SZ/sz/src/sz_float_pwr.c +++ b/thirdparty/SZ/sz/src/sz_float_pwr.c @@ -23,6 +23,7 @@ #include "sz_float_pwr.h" #include "zlib.h" #include "rw.h" +#include "utility.h" void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) { @@ -1780,3 +1781,190 @@ size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, free_TightDataPointStorageF(tdps); } + +#include <stdbool.h> + +void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max){ + + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + + TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(float)) + SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max){ + + size_t dataLength = r1 * r2; + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + + TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(float)) + SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} + +void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max){ + + size_t dataLength = r1 * r2 * r3; + float * log_data = (float *) malloc(dataLength * sizeof(float)); + + unsigned char * signs = (unsigned char *) malloc(dataLength); + memset(signs, 0, dataLength); + // preprocess + float max_abs_log_data; + if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); + else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); + else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); + float min_log_data = max_abs_log_data; + bool positive = true; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] < 0){ + signs[i] = 1; + log_data[i] = -oriData[i]; + positive = false; + } + else + log_data[i] = oriData[i]; + if(log_data[i] > 0){ + log_data[i] = log2(log_data[i]); + if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; + if(log_data[i] < min_log_data) min_log_data = log_data[i]; + } + } + + float valueRangeSize, medianValue_f; + computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); + if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); + double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; + for(size_t i=0; i<dataLength; i++){ + if(oriData[i] == 0){ + log_data[i] = min_log_data - 2.0001*realPrecision; + } + } + + TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); + tdps->minLogValue = min_log_data - 1.0001*realPrecision; + free(log_data); + if(!positive){ + unsigned char * comp_signs; + // compress signs + unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); + tdps->pwrErrBoundBytes = comp_signs; + tdps->pwrErrBoundBytes_size = signSize; + } + else{ + tdps->pwrErrBoundBytes = NULL; + tdps->pwrErrBoundBytes_size = 0; + } + free(signs); + + convertTDPStoFlatBytes_float(tdps, newByteData, outSize); + if(*outSize>dataLength*sizeof(float)) + SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); + + free_TightDataPointStorageF(tdps); +} diff --git a/thirdparty/SZ/sz/src/sz_int16.c b/thirdparty/SZ/sz/src/sz_int16.c index fc91dd1..8226234 100644 --- a/thirdparty/SZ/sz/src/sz_int16.c +++ b/thirdparty/SZ/sz/src/sz_int16.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_int16.h" +#include "utility.h" unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision) { @@ -266,7 +267,7 @@ TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLe //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1369,7 +1370,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_int32.c b/thirdparty/SZ/sz/src/sz_int32.c index bcd9783..6c5a66e 100644 --- a/thirdparty/SZ/sz/src/sz_int32.c +++ b/thirdparty/SZ/sz/src/sz_int32.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_int32.h" +#include "utility.h" unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision) { @@ -268,7 +269,7 @@ TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLe //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1253,7 +1254,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_int64.c b/thirdparty/SZ/sz/src/sz_int64.c index eb97377..065fb16 100644 --- a/thirdparty/SZ/sz/src/sz_int64.c +++ b/thirdparty/SZ/sz/src/sz_int64.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_int64.h" +#include "utility.h" unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision) { @@ -269,7 +270,7 @@ TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLe //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1254,7 +1255,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_int8.c b/thirdparty/SZ/sz/src/sz_int8.c index c869681..83febd0 100644 --- a/thirdparty/SZ/sz/src/sz_int8.c +++ b/thirdparty/SZ/sz/src/sz_int8.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_int8.h" +#include "utility.h" unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision) { @@ -266,7 +267,7 @@ TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLeng //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1370,7 +1371,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_uint16.c b/thirdparty/SZ/sz/src/sz_uint16.c index 4200b31..b55f007 100644 --- a/thirdparty/SZ/sz/src/sz_uint16.c +++ b/thirdparty/SZ/sz/src/sz_uint16.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_uint16.h" +#include "utility.h" unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision) { @@ -266,7 +267,7 @@ TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t data //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1369,7 +1370,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_uint32.c b/thirdparty/SZ/sz/src/sz_uint32.c index 29d5961..6f27510 100644 --- a/thirdparty/SZ/sz/src/sz_uint32.c +++ b/thirdparty/SZ/sz/src/sz_uint32.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_uint32.h" +#include "utility.h" unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision) { @@ -268,7 +269,7 @@ TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t data //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1253,7 +1254,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_uint64.c b/thirdparty/SZ/sz/src/sz_uint64.c index b8cb8bc..7d2eca8 100644 --- a/thirdparty/SZ/sz/src/sz_uint64.c +++ b/thirdparty/SZ/sz/src/sz_uint64.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_uint64.h" +#include "utility.h" unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision) { @@ -268,7 +269,7 @@ TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t data //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1253,7 +1254,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/sz_uint8.c b/thirdparty/SZ/sz/src/sz_uint8.c index 6ca4ae4..6865564 100644 --- a/thirdparty/SZ/sz/src/sz_uint8.c +++ b/thirdparty/SZ/sz/src/sz_uint8.c @@ -21,6 +21,7 @@ #include "rw.h" #include "TightDataPointStorageI.h" #include "sz_uint8.h" +#include "utility.h" unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision) { @@ -266,7 +267,7 @@ TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLe //pred = 2*last3CmprsData[0] - last3CmprsData[1]; pred = last3CmprsData[0]; predAbsErr = llabs(curData - pred); - if(predAbsErr<=checkRadius) + if(predAbsErr<checkRadius) { state = (predAbsErr/realPrecision+1)/2; if(curData>=pred) @@ -1370,7 +1371,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio) } else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) { - *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode); + *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); free(tmpByteData); } else diff --git a/thirdparty/SZ/sz/src/szd_double.c b/thirdparty/SZ/sz/src/szd_double.c index 1440e2d..09d5851 100644 --- a/thirdparty/SZ/sz/src/szd_double.c +++ b/thirdparty/SZ/sz/src/szd_double.c @@ -16,6 +16,7 @@ #include "Huffman.h" #include "szd_double_pwr.h" #include "szd_double_ts.h" +#include "utility.h" int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) { @@ -29,10 +30,10 @@ int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, unsigned char* szTmpBytes; if(cmpSize!=12+4+MetaDataByteLength && cmpSize!=12+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) { - if(isZlib) + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -46,7 +47,7 @@ int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE); + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE); //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory @@ -80,22 +81,45 @@ int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, (*newData)[i] = bytesToDouble(p); } } - else if (dim == 1) - getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); - else - if (dim == 2) - getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode); - else - if (dim == 3) - getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode); - else - if (dim == 4) - getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); - else + else { - printf("Error: currently support only at most 4 dimensions!\n"); - status = SZ_DERR; - } + if(tdps->raBytes_size > 0) //v2.0 + { + if (dim == 1) + getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); + else if(dim == 2) + decompressDataSeries_double_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes); + else if(dim == 3) + decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes); + else if(dim == 4) + decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + else //1.4.13 + { + if (dim == 1) + getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); + else + if (dim == 2) + getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode); + else + if (dim == 3) + getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode); + else + if (dim == 4) + getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + } + free_TightDataPointStorageD2(tdps); if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=12+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) free(szTmpBytes); @@ -1647,8 +1671,8 @@ void getSnapshotData_double_1D(double** data, size_t dataSeriesLength, TightData } else { - //decompressDataSeries_double_1D_pwr(data, dataSeriesLength, tdps); - decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps); + decompressDataSeries_double_1D_pwr_pre_log(data, dataSeriesLength, tdps); + //decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps); } return; } else { @@ -1671,7 +1695,8 @@ void getSnapshotData_double_1D(double** data, size_t dataSeriesLength, TightData if(errBoundMode < PW_REL) decompressDataSeries_double_1D(&decmpData, dataSeriesLength, tdps); else - decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps); + //decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps); + decompressDataSeries_double_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1711,7 +1736,8 @@ void getSnapshotData_double_2D(double** data, size_t r1, size_t r2, TightDataPoi decompressDataSeries_double_2D(data, r1, r2, tdps); } else - decompressDataSeries_double_2D_pwr(data, r1, r2, tdps); + //decompressDataSeries_double_2D_pwr(data, r1, r2, tdps); + decompressDataSeries_double_2D_pwr_pre_log(data, r1, r2, tdps); return; } else { *data = (double*)malloc(sizeof(double)*dataSeriesLength); @@ -1733,7 +1759,8 @@ void getSnapshotData_double_2D(double** data, size_t r1, size_t r2, TightDataPoi if(errBoundMode < PW_REL) decompressDataSeries_double_2D(&decmpData, r1, r2, tdps); else - decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps); + //decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps); + decompressDataSeries_double_2D_pwr_pre_log(&decmpData, r1, r2, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1773,7 +1800,8 @@ void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, T decompressDataSeries_double_3D(data, r1, r2, r3, tdps); } else - decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps); + //decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps); + decompressDataSeries_double_3D_pwr_pre_log(data, r1, r2, r3, tdps); return; } else { *data = (double*)malloc(sizeof(double)*dataSeriesLength); @@ -1795,7 +1823,8 @@ void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, T if(errBoundMode < PW_REL) decompressDataSeries_double_3D(&decmpData, r1, r2, r3, tdps); else - decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps); + //decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps); + decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1836,7 +1865,8 @@ void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, s } else { - decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps); + //decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps); + decompressDataSeries_double_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); //ToDO //decompressDataSeries_double_4D_pwr(data, r1, r2, r3, r4, tdps); } @@ -1858,7 +1888,8 @@ void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, s if(errBoundMode < PW_REL) decompressDataSeries_double_4D(&decmpData, r1, r2, r3, r4, tdps); else - decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); + //decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); + decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps); //ToDo //decompressDataSeries_double_4D_pwr(&decmpData, r1, r2, r3, r4, tdps); // insert the decompressed data @@ -1873,3 +1904,2719 @@ void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, s } } } + +void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data){ + + size_t dim0_offset = r2; + size_t num_elements = r1 * r2; + + *data = (double*)malloc(sizeof(double)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t num_blocks = num_x * num_y; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + double mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(double)); + comp_data_pos += sizeof(double); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i<num_blocks; i++){ + if(!indicator[i]) reg_count ++; + } + //printf("reg_count: %ld\n", reg_count); + + int coeff_intvRadius[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + int * coeff_type[3]; + double precision[3]; + double * coeff_unpred_data[3]; + if(reg_count > 0){ + for(int i=0; i<3; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (double *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + double last_coefficients[3] = {0.0}; + int coeff_unpred_data_count[3] = {0}; + int coeff_index = 0; + updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + double * unpred_data = (double *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(double); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = exe_params->intvRadius; + + int * type; + + double * data_pos = *data; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + size_t cur_unpred_count; + + unsigned char * indicator_pos = indicator; + if(use_mean){ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y; + if(*indicator_pos){ + // decompress by SZ + + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + // d11 is current data + size_t unpredictable_count = 0; + double d00, d01, d10; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + d00 = d01 = d10 = 1; + if(i == 0 && ii == 0){ + d00 = d01 = 0; + } + if(j == 0 && jj == 0){ + d00 = d10 = 0; + } + if(d00){ + d00 = block_data_pos[- dim0_offset - 1]; + } + if(d01){ + d01 = block_data_pos[- dim0_offset]; + } + if(d10){ + d10 = block_data_pos[- 1]; + } + if(type_ < intvRadius) type_ += 1; + pred = d10 + d01 - d00; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<3; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + } + + type += current_block_elements; + indicator_pos ++; + unpred_data += cur_unpred_count; + } + } + } + else{ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y; + if(*indicator_pos){ + // decompress by SZ + + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + // d11 is current data + size_t unpredictable_count = 0; + double d00, d01, d10; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + d00 = d01 = d10 = 1; + if(i == 0 && ii == 0){ + d00 = d01 = 0; + } + if(j == 0 && jj == 0){ + d00 = d10 = 0; + } + if(d00){ + d00 = block_data_pos[- dim0_offset - 1]; + } + if(d01){ + d01 = block_data_pos[- dim0_offset]; + } + if(d10){ + d10 = block_data_pos[- 1]; + } + pred = d10 + d01 - d00; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<3; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + } + + type += current_block_elements; + indicator_pos ++; + unpred_data += cur_unpred_count; + } + } + } + free(coeff_result_type); + + free(indicator); + free(result_type); +} + + +void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (double*)malloc(sizeof(double)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+4, nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + double mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(double)); + comp_data_pos += sizeof(double); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i<num_blocks; i++){ + if(!indicator[i]) reg_count ++; + } + + int coeff_intvRadius[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + int * coeff_type[4]; + double precision[4]; + double * coeff_unpred_data[4]; + if(reg_count > 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (double *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(double); + SZ_ReleaseHuffman(huffmanTree); + } + } + double last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + double * unpred_data = (double *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(double); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = exe_params->intvRadius; + + int * type; + double * data_pos = *data; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + if(use_mean){ + // type = result_type; + + // for(size_t i=0; i<num_x; i++){ + // for(size_t j=0; j<num_y; j++){ + // for(size_t k=0; k<num_z; k++){ + // offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + // offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + // offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + // data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + // current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + // current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + // current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + // // type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y; + // // type = result_type + type_offset; + // size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + // // index = i * num_y * num_z + j * num_z + k; + + // // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]); + // if(*indicator_pos){ + // // decompress by SZ + // // cur_unpred_count = decompressDataSeries_double_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); + // double * block_data_pos = data_pos; + // double pred; + // size_t index = 0; + // int type_; + // // d111 is current data + // size_t unpredictable_count = 0; + // double d000, d001, d010, d011, d100, d101, d110; + // for(size_t ii=0; ii<current_blockcount_x; ii++){ + // for(size_t jj=0; jj<current_blockcount_y; jj++){ + // for(size_t kk=0; kk<current_blockcount_z; kk++){ + // type_ = type[index]; + // if(type_ == intvRadius){ + // *block_data_pos = mean; + // } + // else if(type_ == 0){ + // *block_data_pos = unpred_data[unpredictable_count ++]; + // } + // else{ + // d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1; + // if(i == 0 && ii == 0){ + // d000 = d001 = d010 = d011 = 0; + // } + // if(j == 0 && jj == 0){ + // d000 = d001 = d100 = d101 = 0; + // } + // if(k == 0 && kk == 0){ + // d000 = d010 = d100 = d110 = 0; + // } + // if(d000){ + // d000 = block_data_pos[- dim0_offset - dim1_offset - 1]; + // } + // if(d001){ + // d001 = block_data_pos[- dim0_offset - dim1_offset]; + // } + // if(d010){ + // d010 = block_data_pos[- dim0_offset - 1]; + // } + // if(d011){ + // d011 = block_data_pos[- dim0_offset]; + // } + // if(d100){ + // d100 = block_data_pos[- dim1_offset - 1]; + // } + // if(d101){ + // d101 = block_data_pos[- dim1_offset]; + // } + // if(d110){ + // d110 = block_data_pos[- 1]; + // } + // if(type_ < intvRadius) type_ += 1; + // pred = d110 + d101 + d011 - d100 - d010 - d001 + d000; + // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + // } + // index ++; + // block_data_pos ++; + // } + // block_data_pos += dim1_offset - current_blockcount_z; + // } + // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + // } + // cur_unpred_count = unpredictable_count; + // } + // else{ + // // decompress by regression + // { + // //restore regression coefficients + // double pred; + // int type_; + // for(int e=0; e<4; e++){ + // // if(i == 0 && j == 0 && k == 19){ + // // printf("~\n"); + // // } + // type_ = coeff_type[e][coeff_index]; + // if (type_ != 0){ + // pred = last_coefficients[e]; + // last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + // } + // else{ + // last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + // coeff_unpred_data_count[e] ++; + // } + // if(fabs(last_coefficients[e]) > 10000){ + // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); + // exit(0); + // } + // } + // coeff_index ++; + // } + // { + // double * block_data_pos = data_pos; + // double pred; + // int type_; + // size_t index = 0; + // size_t unpredictable_count = 0; + // for(size_t ii=0; ii<current_blockcount_x; ii++){ + // for(size_t jj=0; jj<current_blockcount_y; jj++){ + // for(size_t kk=0; kk<current_blockcount_z; kk++){ + // if(block_data_pos - (*data) == 19470788){ + // printf("dec stop\n"); + // } + + // type_ = type[index]; + // if (type_ != 0){ + // pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + // } + // else{ + // *block_data_pos = unpred_data[unpredictable_count ++]; + // } + // index ++; + // block_data_pos ++; + // } + // block_data_pos += dim1_offset - current_blockcount_z; + // } + // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + // } + // cur_unpred_count = unpredictable_count; + // } + // } + + // type += current_block_elements; + // indicator_pos ++; + // unpred_data += cur_unpred_count; + // // decomp_unpred += cur_unpred_count; + // // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); + // // fflush(stdout); + // } + // } + // } + + type = result_type; + // i == 0 + { + // j == 0 + { + // k == 0 + { + data_pos = *data; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = 0; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + // i == 0 j == 0 k != 0 + for(size_t k=1; k<num_z; k++){ + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j==0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_y * dim1_offset; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_y * dim1_offset + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } // end i==0 + for(size_t i=1; i<num_x; i++){ + // j == 0 + { + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = *data + offset_x * dim0_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j = 0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } + } + else{ + type = result_type; + // i == 0 + { + // j == 0 + { + // k == 0 + { + data_pos = *data; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = 0; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + // i == 0 j == 0 k != 0 + for(size_t k=1; k<num_z; k++){ + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j==0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_y * dim1_offset; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_y * dim1_offset + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } // end i==0 + for(size_t i=1; i<num_x; i++){ + // j == 0 + { + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = *data + offset_x * dim0_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j = 0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + double * block_data_pos = data_pos; + double pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + double pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + double * block_data_pos = data_pos; + double pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } + } + + free(coeff_result_type); + + free(indicator); + free(result_type); +} diff --git a/thirdparty/SZ/sz/src/szd_double_pwr.c b/thirdparty/SZ/sz/src/szd_double_pwr.c index f4a6fd8..a3ec18e 100644 --- a/thirdparty/SZ/sz/src/szd_double_pwr.c +++ b/thirdparty/SZ/sz/src/szd_double_pwr.c @@ -13,6 +13,7 @@ #include "TightDataPointStorageD.h" #include "sz.h" #include "Huffman.h" +#include "utility.h" //#include "rw.h" #pragma GCC diagnostic push @@ -1347,4 +1348,77 @@ void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLen free(groupErrorBounds); free(groupID); } + +void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) { + + decompressDataSeries_double_1D(data, dataSeriesLength, tdps); + double threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } + +} + +void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps) { + + size_t dataSeriesLength = r1 * r2; + decompressDataSeries_double_2D(data, r1, r2, tdps); + double threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } +} + +void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps) { + + size_t dataSeriesLength = r1 * r2 * r3; + decompressDataSeries_double_3D(data, r1, r2, r3, tdps); + double threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } +} + #pragma GCC diagnostic pop diff --git a/thirdparty/SZ/sz/src/szd_float.c b/thirdparty/SZ/sz/src/szd_float.c index 5a420af..0c7df9e 100644 --- a/thirdparty/SZ/sz/src/szd_float.c +++ b/thirdparty/SZ/sz/src/szd_float.c @@ -1,7 +1,7 @@ /** * @file szd_float.c - * @author Sheng Di and Dingwen Tao - * @date Aug, 2016 + * @author Sheng Di, Dingwen Tao, Xin Liang + * @date Aug, 2018 * @brief * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. * See COPYRIGHT in top-level directory. @@ -16,6 +16,7 @@ #include "Huffman.h" #include "szd_float_pwr.h" #include "szd_float_ts.h" +#include "utility.h" /** * @@ -35,10 +36,10 @@ int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, s if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) { - if(isZlib) + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -53,7 +54,7 @@ int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, s { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory @@ -88,21 +89,40 @@ int SZ_decompress_args_float(float** newData, size_t r5, size_t r4, size_t r3, s (*newData)[i] = bytesToFloat(p); } } - else if (dim == 1) - getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); - else - if (dim == 2) - getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode); - else - if (dim == 3) - getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode); - else - if (dim == 4) - getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); - else + else { - printf("Error: currently support only at most 4 dimensions!\n"); - status = SZ_DERR; + if(tdps->raBytes_size > 0) //v2.0 + { + if (dim == 1) + getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); + else if(dim == 2) + decompressDataSeries_float_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes); + else if(dim == 3) + decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes); + else if(dim == 4) + decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } + else //1.4.13 + { + if (dim == 1) + getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); + else if (dim == 2) + getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode); + else if (dim == 3) + getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode); + else if (dim == 4) + getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); + else + { + printf("Error: currently support only at most 4 dimensions!\n"); + status = SZ_DERR; + } + } } free_TightDataPointStorageF2(tdps); if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) @@ -1651,8 +1671,8 @@ void getSnapshotData_float_1D(float** data, size_t dataSeriesLength, TightDataPo } else { - //decompressDataSeries_float_1D_pwr(data, dataSeriesLength, tdps); - decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps); + decompressDataSeries_float_1D_pwr_pre_log(data, dataSeriesLength, tdps); + //decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps); } return; } else { @@ -1675,7 +1695,8 @@ void getSnapshotData_float_1D(float** data, size_t dataSeriesLength, TightDataPo if(errBoundMode < PW_REL) decompressDataSeries_float_1D(&decmpData, dataSeriesLength, tdps); else - decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps); + //decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps); + decompressDataSeries_float_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1716,7 +1737,8 @@ void getSnapshotData_float_2D(float** data, size_t r1, size_t r2, TightDataPoint } else { - decompressDataSeries_float_2D_pwr(data, r1, r2, tdps); + //decompressDataSeries_float_2D_pwr(data, r1, r2, tdps); + decompressDataSeries_float_2D_pwr_pre_log(data, r1, r2, tdps); } return; @@ -1740,7 +1762,8 @@ void getSnapshotData_float_2D(float** data, size_t r1, size_t r2, TightDataPoint if(errBoundMode < PW_REL) decompressDataSeries_float_2D(&decmpData, r1, r2, tdps); else - decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps); + //decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps); + decompressDataSeries_float_2D_pwr_pre_log(&decmpData, r1, r2, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1773,7 +1796,7 @@ void getSnapshotData_float_3D(float** data, size_t r1, size_t r2, size_t r3, Tig if(multisteps->compressionType == 0) decompressDataSeries_float_3D(data, r1, r2, r3, tdps); else - decompressDataSeries_float_1D_ts(data, r1*r2*r3, multisteps, tdps); + decompressDataSeries_float_1D_ts(data, dataSeriesLength, multisteps, tdps); } else #endif @@ -1781,7 +1804,8 @@ void getSnapshotData_float_3D(float** data, size_t r1, size_t r2, size_t r3, Tig } else { - decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps); + //decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps); + decompressDataSeries_float_3D_pwr_pre_log(data, r1, r2, r3, tdps); } return; @@ -1805,7 +1829,8 @@ void getSnapshotData_float_3D(float** data, size_t r1, size_t r2, size_t r3, Tig if(errBoundMode < PW_REL) decompressDataSeries_float_3D(&decmpData, r1, r2, r3, tdps); else - decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps); + //decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps); + decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps); // insert the decompressed data size_t k = 0; for (i = 0; i < dataSeriesLength; i++) { @@ -1846,7 +1871,8 @@ void getSnapshotData_float_4D(float** data, size_t r1, size_t r2, size_t r3, siz } else { - decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps); + //decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps); + decompressDataSeries_float_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); //ToDO //decompressDataSeries_float_4D_pwr(data, r1, r2, r3, r4, tdps); } @@ -1868,7 +1894,8 @@ void getSnapshotData_float_4D(float** data, size_t r1, size_t r2, size_t r3, siz if(errBoundMode < PW_REL) decompressDataSeries_float_4D(&decmpData, r1, r2, r3, r4, tdps); else - decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); + //decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); + decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps); //ToDO //decompressDataSeries_float_4D_pwr(&decompData, r1, r2, r3, r4, tdps); // insert the decompressed data @@ -2177,3 +2204,3059 @@ size_t decompressDataSeries_float_2D_RA_block(float * data, float mean, size_t d return unpredictable_count; } +void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data){ + + size_t dim0_offset = r2; + size_t num_elements = r1 * r2; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + + size_t split_index_x, split_index_y; + size_t early_blockcount_x, early_blockcount_y; + size_t late_blockcount_x, late_blockcount_y; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + + size_t num_blocks = num_x * num_y; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i<num_blocks; i++){ + if(!indicator[i]) reg_count ++; + } + //printf("reg_count: %ld\n", reg_count); + + int coeff_intvRadius[3]; + int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); + int * coeff_type[3]; + double precision[3]; + float * coeff_unpred_data[3]; + if(reg_count > 0){ + for(int i=0; i<3; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[3] = {0.0}; + int coeff_unpred_data_count[3] = {0}; + int coeff_index = 0; + updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = exe_params->intvRadius; + + int * type; + + float * data_pos = *data; + size_t offset_x, offset_y; + size_t current_blockcount_x, current_blockcount_y; + size_t cur_unpred_count; + + unsigned char * indicator_pos = indicator; + if(use_mean){ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y; + if(*indicator_pos){ + // decompress by SZ + + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + // d11 is current data + size_t unpredictable_count = 0; + float d00, d01, d10; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + d00 = d01 = d10 = 1; + if(i == 0 && ii == 0){ + d00 = d01 = 0; + } + if(j == 0 && jj == 0){ + d00 = d10 = 0; + } + if(d00){ + d00 = block_data_pos[- dim0_offset - 1]; + } + if(d01){ + d01 = block_data_pos[- dim0_offset]; + } + if(d10){ + d10 = block_data_pos[- 1]; + } + if(type_ < intvRadius) type_ += 1; + pred = d10 + d01 - d00; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<3; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + } + + type += current_block_elements; + indicator_pos ++; + unpred_data += cur_unpred_count; + } + } + } + else{ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y; + if(*indicator_pos){ + // decompress by SZ + + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + // d11 is current data + size_t unpredictable_count = 0; + float d00, d01, d10; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + d00 = d01 = d10 = 1; + if(i == 0 && ii == 0){ + d00 = d01 = 0; + } + if(j == 0 && jj == 0){ + d00 = d10 = 0; + } + if(d00){ + d00 = block_data_pos[- dim0_offset - 1]; + } + if(d01){ + d01 = block_data_pos[- dim0_offset]; + } + if(d10){ + d10 = block_data_pos[- 1]; + } + pred = d10 + d01 - d00; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<3; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim0_offset - current_blockcount_y; + } + cur_unpred_count = unpredictable_count; + } + } + + type += current_block_elements; + indicator_pos ++; + unpred_data += cur_unpred_count; + } + } + } + free(coeff_result_type); + + free(indicator); + free(result_type); +} + + +void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); + SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i<num_blocks; i++){ + if(!indicator[i]) reg_count ++; + } + + int coeff_intvRadius[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + int * coeff_type[4]; + double precision[4]; + float * coeff_unpred_data[4]; + if(reg_count > 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_elements * sizeof(int)); + decode(comp_data_pos, num_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = exe_params->intvRadius; + + int * type; + float * data_pos = *data; + size_t offset_x, offset_y, offset_z; + size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + if(use_mean){ + // type = result_type; + + // for(size_t i=0; i<num_x; i++){ + // for(size_t j=0; j<num_y; j++){ + // for(size_t k=0; k<num_z; k++){ + // offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + // offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + // offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + // data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + // current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + // current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + // current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + // // type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y; + // // type = result_type + type_offset; + // size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + // // index = i * num_y * num_z + j * num_z + k; + + // // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]); + // if(*indicator_pos){ + // // decompress by SZ + // // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); + // float * block_data_pos = data_pos; + // float pred; + // size_t index = 0; + // int type_; + // // d111 is current data + // size_t unpredictable_count = 0; + // float d000, d001, d010, d011, d100, d101, d110; + // for(size_t ii=0; ii<current_blockcount_x; ii++){ + // for(size_t jj=0; jj<current_blockcount_y; jj++){ + // for(size_t kk=0; kk<current_blockcount_z; kk++){ + // type_ = type[index]; + // if(type_ == intvRadius){ + // *block_data_pos = mean; + // } + // else if(type_ == 0){ + // *block_data_pos = unpred_data[unpredictable_count ++]; + // } + // else{ + // d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1; + // if(i == 0 && ii == 0){ + // d000 = d001 = d010 = d011 = 0; + // } + // if(j == 0 && jj == 0){ + // d000 = d001 = d100 = d101 = 0; + // } + // if(k == 0 && kk == 0){ + // d000 = d010 = d100 = d110 = 0; + // } + // if(d000){ + // d000 = block_data_pos[- dim0_offset - dim1_offset - 1]; + // } + // if(d001){ + // d001 = block_data_pos[- dim0_offset - dim1_offset]; + // } + // if(d010){ + // d010 = block_data_pos[- dim0_offset - 1]; + // } + // if(d011){ + // d011 = block_data_pos[- dim0_offset]; + // } + // if(d100){ + // d100 = block_data_pos[- dim1_offset - 1]; + // } + // if(d101){ + // d101 = block_data_pos[- dim1_offset]; + // } + // if(d110){ + // d110 = block_data_pos[- 1]; + // } + // if(type_ < intvRadius) type_ += 1; + // pred = d110 + d101 + d011 - d100 - d010 - d001 + d000; + // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + // } + // index ++; + // block_data_pos ++; + // } + // block_data_pos += dim1_offset - current_blockcount_z; + // } + // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + // } + // cur_unpred_count = unpredictable_count; + // } + // else{ + // // decompress by regression + // { + // //restore regression coefficients + // float pred; + // int type_; + // for(int e=0; e<4; e++){ + // // if(i == 0 && j == 0 && k == 19){ + // // printf("~\n"); + // // } + // type_ = coeff_type[e][coeff_index]; + // if (type_ != 0){ + // pred = last_coefficients[e]; + // last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + // } + // else{ + // last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + // coeff_unpred_data_count[e] ++; + // } + // if(fabs(last_coefficients[e]) > 10000){ + // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); + // exit(0); + // } + // } + // coeff_index ++; + // } + // { + // float * block_data_pos = data_pos; + // float pred; + // int type_; + // size_t index = 0; + // size_t unpredictable_count = 0; + // for(size_t ii=0; ii<current_blockcount_x; ii++){ + // for(size_t jj=0; jj<current_blockcount_y; jj++){ + // for(size_t kk=0; kk<current_blockcount_z; kk++){ + // if(block_data_pos - (*data) == 19470788){ + // printf("dec stop\n"); + // } + + // type_ = type[index]; + // if (type_ != 0){ + // pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + // } + // else{ + // *block_data_pos = unpred_data[unpredictable_count ++]; + // } + // index ++; + // block_data_pos ++; + // } + // block_data_pos += dim1_offset - current_blockcount_z; + // } + // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + // } + // cur_unpred_count = unpredictable_count; + // } + // } + + // type += current_block_elements; + // indicator_pos ++; + // unpred_data += cur_unpred_count; + // // decomp_unpred += cur_unpred_count; + // // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); + // // fflush(stdout); + // } + // } + // } + + type = result_type; + // i == 0 + { + // j == 0 + { + // k == 0 + { + data_pos = *data; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = 0; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + // i == 0 j == 0 k != 0 + for(size_t k=1; k<num_z; k++){ + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j==0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_y * dim1_offset; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_y * dim1_offset + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } // end i==0 + for(size_t i=1; i<num_x; i++){ + // j == 0 + { + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = *data + offset_x * dim0_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j = 0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == intvRadius){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + if(type_ < intvRadius) type_ += 1; + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } + } + else{ + type = result_type; + // i == 0 + { + // j == 0 + { + // k == 0 + { + data_pos = *data; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = 0; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + // i == 0 j == 0 k != 0 + for(size_t k=1; k<num_z; k++){ + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j==0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_y * dim1_offset; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_y * dim1_offset + offset_z; + + current_blockcount_x = early_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + // ii == 0 + { + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + for(size_t ii=1; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } // end i==0 + for(size_t i=1; i<num_x; i++){ + // j == 0 + { + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + data_pos = *data + offset_x * dim0_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim0_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = early_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + // jj == 0 + { + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + for(size_t jj=1; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + }// end j = 0 + for(size_t j=1; j<num_y; j++){ + // k == 0 + { + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = early_blockcount_z; + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + { + // kk == 0 + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + for(size_t kk=1; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } // end k == 0 + for(size_t k=1; k<num_z; k++){ + offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; + offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; + offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; + data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; + + current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; + current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; + current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; + + size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; + if(*indicator_pos){ + // decompress by SZ + float * block_data_pos = data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float * block_data_pos = data_pos; + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<current_blockcount_x; ii++){ + for(size_t jj=0; jj<current_blockcount_y; jj++){ + for(size_t kk=0; kk<current_blockcount_z; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + index ++; + block_data_pos ++; + } + block_data_pos += dim1_offset - current_blockcount_z; + } + block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + type += current_block_elements; + unpred_data += cur_unpred_count; + } + } + } + } + +#ifdef HAVE_TIMECMPR + if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) + memcpy(multisteps->hist_data, (*data), num_elements*sizeof(float)); +#endif + + free(coeff_result_type); + + free(indicator); + free(result_type); +} + +void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + size_t num_elements = r1 * r2 * r3; + + *data = (float*)malloc(sizeof(float)*num_elements); + + unsigned char * comp_data_pos = comp_data; + + size_t block_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + // calculate block dims + size_t num_x, num_y, num_z; + num_x = (r1 - 1) / block_size + 1; + num_y = (r2 - 1) / block_size + 1; + num_z = (r3 - 1) / block_size + 1; + + size_t max_num_block_elements = block_size * block_size * block_size; + size_t num_blocks = num_x * num_y * num_z; + + double realPrecision = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + updateQuantizationInfo(intervals); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + + int stateNum = 2*intervals; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + float mean; + unsigned char use_mean; + memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); + comp_data_pos += sizeof(unsigned char); + memcpy(&mean, comp_data_pos, sizeof(float)); + comp_data_pos += sizeof(float); + size_t reg_count = 0; + + unsigned char * indicator; + size_t indicator_bitlength = (num_blocks - 1)/8 + 1; + convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); + comp_data_pos += indicator_bitlength; + for(size_t i=0; i<num_blocks; i++){ + if(!indicator[i]) reg_count ++; + } + + int coeff_intvRadius[4]; + int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); + int * coeff_type[4]; + double precision[4]; + float * coeff_unpred_data[4]; + if(reg_count > 0){ + for(int i=0; i<4; i++){ + precision[i] = bytesToDouble(comp_data_pos); + comp_data_pos += sizeof(double); + coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + int stateNum = 2*coeff_intvRadius[i]*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + int nodeCount = bytesToInt_bigEndian(comp_data_pos); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); + comp_data_pos += sizeof(int) + tree_size; + + coeff_type[i] = coeff_result_type + i * num_blocks; + size_t typeArray_size = bytesToSize(comp_data_pos); + decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); + comp_data_pos += sizeof(size_t) + typeArray_size; + int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(int); + coeff_unpred_data[i] = (float *) comp_data_pos; + comp_data_pos += coeff_unpred_count * sizeof(float); + SZ_ReleaseHuffman(huffmanTree); + } + } + float last_coefficients[4] = {0.0}; + int coeff_unpred_data_count[4] = {0}; + int coeff_index = 0; + updateQuantizationInfo(intervals); + + size_t total_unpred; + memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); + comp_data_pos += sizeof(size_t); + float * unpred_data = (float *) comp_data_pos; + comp_data_pos += total_unpred * sizeof(float); + + int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); + decode(comp_data_pos, num_blocks*max_num_block_elements, root, result_type); + SZ_ReleaseHuffman(huffmanTree); + + int intvRadius = exe_params->intvRadius; + + int * type; + float * data_pos = *data; + size_t cur_unpred_count; + unsigned char * indicator_pos = indicator; + int dec_buffer_size = block_size + 1; + float * dec_buffer = (float *) malloc(dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); + memset(dec_buffer, 0, dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); + float * block_data_pos_x = NULL; + float * block_data_pos_y = NULL; + float * block_data_pos_z = NULL; + int block_dim0_offset = dec_buffer_size*dec_buffer_size; + int block_dim1_offset = dec_buffer_size; + if(use_mean){ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1; + if(*indicator_pos){ + // decompress by SZ + // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); + float * block_data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk; + type_ = type[index]; + if(type_ == 1){ + *block_data_pos = mean; + } + else if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1] + - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + } + } + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + // if(i == 0 && j == 0 && k == 19){ + // printf("~\n"); + // } + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++]; + } + index ++; + } + } + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + unpred_data += cur_unpred_count; + // decomp_unpred += cur_unpred_count; + // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); + // fflush(stdout); + type += block_size * block_size * block_size; + + // mv data back + block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; + for(int ii=0; ii<block_size; ii++){ + if(i*block_size + ii >= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj<block_size; jj++){ + if(j*block_size + jj >= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk<block_size; kk++){ + if(k*block_size + kk >= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + + } + } + } + + } + else{ + type = result_type; + for(size_t i=0; i<num_x; i++){ + for(size_t j=0; j<num_y; j++){ + for(size_t k=0; k<num_z; k++){ + data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1; + if(*indicator_pos){ + // decompress by SZ + // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); + float * block_data_pos; + float pred; + size_t index = 0; + int type_; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk; + type_ = type[index]; + if(type_ == 0){ + *block_data_pos = unpred_data[unpredictable_count ++]; + } + else{ + pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1] + - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1]; + *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; + } + index ++; + } + } + } + cur_unpred_count = unpredictable_count; + } + else{ + // decompress by regression + { + //restore regression coefficients + float pred; + int type_; + for(int e=0; e<4; e++){ + // if(i == 0 && j == 0 && k == 19){ + // printf("~\n"); + // } + type_ = coeff_type[e][coeff_index]; + if (type_ != 0){ + pred = last_coefficients[e]; + last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; + } + else{ + last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; + coeff_unpred_data_count[e] ++; + } + } + coeff_index ++; + } + { + float pred; + int type_; + size_t index = 0; + size_t unpredictable_count = 0; + for(size_t ii=0; ii<block_size; ii++){ + for(size_t jj=0; jj<block_size; jj++){ + for(size_t kk=0; kk<block_size; kk++){ + type_ = type[index]; + if (type_ != 0){ + pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; + data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision; + } + else{ + data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++]; + } + index ++; + } + } + } + cur_unpred_count = unpredictable_count; + } + } + indicator_pos ++; + unpred_data += cur_unpred_count; + // decomp_unpred += cur_unpred_count; + // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); + // fflush(stdout); + type += block_size * block_size * block_size; + // mv data back + block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; + for(int ii=0; ii<block_size; ii++){ + if(i*block_size + ii >= r1) break; + block_data_pos_y = block_data_pos_x; + for(int jj=0; jj<block_size; jj++){ + if(j*block_size + jj >= r2) break; + block_data_pos_z = block_data_pos_y; + for(int kk=0; kk<block_size; kk++){ + if(k*block_size + kk >= r3) break; + *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; + block_data_pos_z ++; + } + block_data_pos_y += dim1_offset; + } + block_data_pos_x += dim0_offset; + } + } + } + } + } + free(dec_buffer); + free(coeff_result_type); + + free(indicator); + free(result_type); +} diff --git a/thirdparty/SZ/sz/src/szd_float_pwr.c b/thirdparty/SZ/sz/src/szd_float_pwr.c index 4ab1834..b761d4b 100644 --- a/thirdparty/SZ/sz/src/szd_float_pwr.c +++ b/thirdparty/SZ/sz/src/szd_float_pwr.c @@ -16,6 +16,7 @@ #include "sz.h" #include "Huffman.h" #include "sz_float_pwr.h" +#include "utility.h" //#include "rw.h" // #pragma GCC diagnostic push @@ -1349,4 +1350,74 @@ void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLengt free(groupErrorBounds); free(groupID); } + +void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) { + + decompressDataSeries_float_1D(data, dataSeriesLength, tdps); + float threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } + +} + +void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps) { + + size_t dataSeriesLength = r1 * r2; + decompressDataSeries_float_2D(data, r1, r2, tdps); + float threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } + +} + +void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps) { + + size_t dataSeriesLength = r1 * r2 * r3; + decompressDataSeries_float_3D(data, r1, r2, r3, tdps); + float threshold = tdps->minLogValue; + if(tdps->pwrErrBoundBytes_size > 0){ + unsigned char * signs; + sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + if(signs[i]) (*data)[i] = -((*data)[i]); + } + free(signs); + } + else{ + for(size_t i=0; i<dataSeriesLength; i++){ + if((*data)[i] < threshold) (*data)[i] = 0; + else (*data)[i] = exp2((*data)[i]); + } + } +} #pragma GCC diagnostic pop diff --git a/thirdparty/SZ/sz/src/szd_int16.c b/thirdparty/SZ/sz/src/szd_int16.c index 3c402dc..1198e05 100644 --- a/thirdparty/SZ/sz/src/szd_int16.c +++ b/thirdparty/SZ/sz/src/szd_int16.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_int16.h" #include "Huffman.h" +#include "utility.h" /** * @@ -32,10 +33,10 @@ int SZ_decompress_args_int16(int16_t** newData, size_t r5, size_t r4, size_t r3, size_t i, tmpSize = 3+MetaDataByteLength+1+sizeof(int16_t)+exe_params->SZ_SIZE_TYPE; unsigned char* szTmpBytes; - if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) + if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_int16(int16_t** newData, size_t r5, size_t r4, size_t r3, { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_int32.c b/thirdparty/SZ/sz/src/szd_int32.c index 43dc74e..b5f31b0 100644 --- a/thirdparty/SZ/sz/src/szd_int32.c +++ b/thirdparty/SZ/sz/src/szd_int32.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_int32.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_int32(int32_t** newData, size_t r5, size_t r4, size_t r3, if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_int32(int32_t** newData, size_t r5, size_t r4, size_t r3, { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_int64.c b/thirdparty/SZ/sz/src/szd_int64.c index aaa4a53..07a054f 100644 --- a/thirdparty/SZ/sz/src/szd_int64.c +++ b/thirdparty/SZ/sz/src/szd_int64.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_int64.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_int64(int64_t** newData, size_t r5, size_t r4, size_t r3, if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_int64(int64_t** newData, size_t r5, size_t r4, size_t r3, { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_int8.c b/thirdparty/SZ/sz/src/szd_int8.c index 758e917..850b459 100644 --- a/thirdparty/SZ/sz/src/szd_int8.c +++ b/thirdparty/SZ/sz/src/szd_int8.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_int8.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_int8(int8_t** newData, size_t r5, size_t r4, size_t r3, s if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_int8(int8_t** newData, size_t r5, size_t r4, size_t r3, s { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_uint16.c b/thirdparty/SZ/sz/src/szd_uint16.c index bdc7469..551eecf 100644 --- a/thirdparty/SZ/sz/src/szd_uint16.c +++ b/thirdparty/SZ/sz/src/szd_uint16.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_uint16.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_uint16(uint16_t** newData, size_t r5, size_t r4, size_t r if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_uint16(uint16_t** newData, size_t r5, size_t r4, size_t r { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_uint32.c b/thirdparty/SZ/sz/src/szd_uint32.c index 795eabe..04e8049 100644 --- a/thirdparty/SZ/sz/src/szd_uint32.c +++ b/thirdparty/SZ/sz/src/szd_uint32.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_uint32.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_uint32(uint32_t** newData, size_t r5, size_t r4, size_t r if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_uint32(uint32_t** newData, size_t r5, size_t r4, size_t r { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_uint64.c b/thirdparty/SZ/sz/src/szd_uint64.c index df2b838..84d5716 100644 --- a/thirdparty/SZ/sz/src/szd_uint64.c +++ b/thirdparty/SZ/sz/src/szd_uint64.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_uint64.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_uint64(uint64_t** newData, size_t r5, size_t r4, size_t r if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_uint64(uint64_t** newData, size_t r5, size_t r4, size_t r { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szd_uint8.c b/thirdparty/SZ/sz/src/szd_uint8.c index cd61635..8b992bc 100644 --- a/thirdparty/SZ/sz/src/szd_uint8.c +++ b/thirdparty/SZ/sz/src/szd_uint8.c @@ -15,6 +15,7 @@ #include "sz.h" #include "szd_uint8.h" #include "Huffman.h" +#include "utility.h" /** * @@ -34,8 +35,8 @@ int SZ_decompress_args_uint8(uint8_t** newData, size_t r5, size_t r4, size_t r3, if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) { - int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]); - if(isZlib) + confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); + if(confparams_dec->losslessCompressor!=-1) confparams_dec->szMode = SZ_BEST_COMPRESSION; else confparams_dec->szMode = SZ_BEST_SPEED; @@ -48,7 +49,7 @@ int SZ_decompress_args_uint8(uint8_t** newData, size_t r5, size_t r4, size_t r3, { if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; - tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize + tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); //memcpy(szTmpBytes, tmpBytes, tmpSize); //free(tmpBytes); //release useless memory diff --git a/thirdparty/SZ/sz/src/szf.c b/thirdparty/SZ/sz/src/szf.c index e3cca0b..43fe0b1 100644 --- a/thirdparty/SZ/sz/src/szf.c +++ b/thirdparty/SZ/sz/src/szf.c @@ -176,70 +176,70 @@ void sz_compress_d5_double_rev_(double* data, double *reservedValue, unsigned ch void sz_compress_d1_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, 0, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d2_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d3_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d4_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, *r4, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d5_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, *r5, *r4, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d1_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, 0, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d2_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d3_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d4_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, *r4, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } void sz_compress_d5_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) { - unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, *r5, *r4, *r3, *r2, *r1); + unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); memcpy(bytes, tmp_bytes, *outSize); free(tmp_bytes); } @@ -411,7 +411,7 @@ void sz_batchaddvar_d1_float_(char* varName, int *len, float* data, int *errBoun for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1); + SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); } void sz_batchaddvar_d2_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) { @@ -420,7 +420,7 @@ void sz_batchaddvar_d2_float_(char* varName, int *len, float* data, int *errBoun for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1); + SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); } void sz_batchaddvar_d3_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) { @@ -429,7 +429,7 @@ void sz_batchaddvar_d3_float_(char* varName, int *len, float* data, int *errBoun for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); } void sz_batchaddvar_d4_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) { @@ -438,7 +438,7 @@ void sz_batchaddvar_d4_float_(char* varName, int *len, float* data, int *errBoun for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); } void sz_batchaddvar_d5_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) { @@ -447,7 +447,7 @@ void sz_batchaddvar_d5_float_(char* varName, int *len, float* data, int *errBoun for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); } void sz_batchaddvar_d1_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) { @@ -456,7 +456,7 @@ void sz_batchaddvar_d1_double_(char* varName, int *len, double* data, int *errBo for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1); + SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); } void sz_batchaddvar_d2_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) { @@ -465,7 +465,7 @@ void sz_batchaddvar_d2_double_(char* varName, int *len, double* data, int *errBo for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1); + SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); } void sz_batchaddvar_d3_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) { @@ -474,7 +474,7 @@ void sz_batchaddvar_d3_double_(char* varName, int *len, double* data, int *errBo for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); } void sz_batchaddvar_d4_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) { @@ -483,7 +483,7 @@ void sz_batchaddvar_d4_double_(char* varName, int *len, double* data, int *errBo for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); } void sz_batchaddvar_d5_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) { @@ -492,7 +492,7 @@ void sz_batchaddvar_d5_double_(char* varName, int *len, double* data, int *errBo for(i=0;i<*len;i++) s2[i]=varName[i]; s2[*len]='\0'; - SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1); + SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); } void sz_batchdelvar_c_(char* varName, int *len, int *errState) { @@ -503,15 +503,18 @@ void sz_batchdelvar_c_(char* varName, int *len, int *errState) s2[*len]='\0'; *errState = SZ_batchDelVar(s2); } + +/*@deprecated*/ void sz_batch_compress_c_(unsigned char* bytes, size_t *outSize) { - unsigned char* tmp_bytes = SZ_batch_compress(outSize); - memcpy(bytes, tmp_bytes, *outSize); - free(tmp_bytes); + //unsigned char* tmp_bytes = SZ_batch_compress(outSize); + //memcpy(bytes, tmp_bytes, *outSize); + //free(tmp_bytes); } +/*@deprecated*/ void sz_batch_decompress_c_(unsigned char* bytes, size_t *byteLength, int *ierr) { - SZ_batch_decompress(bytes, *byteLength, ierr); + //SZ_batch_decompress(bytes, *byteLength, ierr); } void sz_getvardim_c_(char* varName, int *len, int *dim, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) -- GitLab