SQLite

Check-in [4dbe0cba3f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the zonefile format in order to avoid depending on the filesize to determine the extent of the final frame. See README.md for details.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | zonefile
Files: files | file ages | folders
SHA3-256: 4dbe0cba3fad9a752834d795127cf35eed21fab63b18a48f75d5c1e96ca77447
User & Date: dan 2018-02-19 14:27:24.815
Context
2018-02-19
16:28
Add support for the ExtendedHeaderSize header field to zonefile. (check-in: 78267a0913 user: dan tags: zonefile)
14:27
Modify the zonefile format in order to avoid depending on the filesize to determine the extent of the final frame. See README.md for details. (check-in: 4dbe0cba3f user: dan tags: zonefile)
2018-02-17
20:22
Add support for "brotli" compression to the zonefile module. (check-in: 3eb25b3fa5 user: dan tags: zonefile)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/zonefile/README.md.
107
108
109
110
111
112
113










114
115
116
117
118
119
120
  *  Multi-byte integer values are big-endian.

  *  The offsets in the ZoneFileIndex.byteOffsetZoneFrame[] array are
     relative to the offset in ZoneFileHeader.byteOffsetFrames. This is
     necessary as we may not know the offset of the start of the frame data
     until after the ZoneFileIndex structure is compressed.











  *  Currently there is no support at all for encryption or compression.

  *  Zonefile currently uses json1 to parse the json argument to
     zonefile\_write(). And so must be used with an SQLITE\_ENABLE\_JSON1
     or otherwise json1-enabled SQLite.









>
>
>
>
>
>
>
>
>
>
|






107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  *  Multi-byte integer values are big-endian.

  *  The offsets in the ZoneFileIndex.byteOffsetZoneFrame[] array are
     relative to the offset in ZoneFileHeader.byteOffsetFrames. This is
     necessary as we may not know the offset of the start of the frame data
     until after the ZoneFileIndex structure is compressed.

  *  The offsets in the ZoneFileIndex.byteOffsetZoneFrame[] array are the
     offsets for the first byte past the end of the corresponding frame.
     For example, byteOffsetZoneFrame[] identifies the first byte of the
     second frame, and byteOffsetZoneFrame[numFrames-1] is one byte past
     the end of the last frame in the file.

     This is better as if we store the starting offset of each frame, there
     is no way to determine the size of the last frame in the file without
     trusting the filesize itself.

  *  Currently there is no support at all for encryption.

  *  Zonefile currently uses json1 to parse the json argument to
     zonefile\_write(). And so must be used with an SQLITE\_ENABLE\_JSON1
     or otherwise json1-enabled SQLite.


Changes to ext/zonefile/zonefile.c.
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
  /* Prepare the SQL statement used to read data from the source table. This
  ** also serves to verify the suitability of the source table schema. */
  pStmt = zonefileCtxPrepare(pCtx, 
      "SELECT k, frame, v FROM %Q ORDER BY frame, idx, k", zTbl
  );
  if( pStmt==0 ) goto zone_write_out;

  /* Open a file-handle used to write out the zonefile */ 
  pFd = zonefileFileOpen(zFile, 1, &zErr);
  if( pFd==0 ){
    sqlite3_result_error(pCtx, zErr, -1);
    sqlite3_free(zErr);
    goto zone_write_out;
  }

  /* If the data compressor uses a global dictionary, create the dictionary
  ** now.  */
  if( sWrite.pCmpData->xTrain ){
    int nSample = 0;

    while( SQLITE_ROW==sqlite3_step(pStmt) ){
      int nByte = sqlite3_column_bytes(pStmt, 2);
      const u8 *aByte = (const u8*)sqlite3_column_blob(pStmt, 2);
      if( zonefileBufferGrow(pCtx, &sSample, nByte) ){







|








|







784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
  /* Prepare the SQL statement used to read data from the source table. This
  ** also serves to verify the suitability of the source table schema. */
  pStmt = zonefileCtxPrepare(pCtx, 
      "SELECT k, frame, v FROM %Q ORDER BY frame, idx, k", zTbl
  );
  if( pStmt==0 ) goto zone_write_out;

  /* Open the file-handle used to write out the zonefile */ 
  pFd = zonefileFileOpen(zFile, 1, &zErr);
  if( pFd==0 ){
    sqlite3_result_error(pCtx, zErr, -1);
    sqlite3_free(zErr);
    goto zone_write_out;
  }

  /* If the data compressor uses a global dictionary, create the dictionary
  ** and store it in buffer sDict.  */
  if( sWrite.pCmpData->xTrain ){
    int nSample = 0;

    while( SQLITE_ROW==sqlite3_step(pStmt) ){
      int nByte = sqlite3_column_bytes(pStmt, 2);
      const u8 *aByte = (const u8*)sqlite3_column_blob(pStmt, 2);
      if( zonefileBufferGrow(pCtx, &sSample, nByte) ){
843
844
845
846
847
848
849

850
851
852
853
854
855
856
857
858
859
860
861






862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883

884

885
886
887
888
889

890

891
892
893
894
895
896
897
  while( SQLITE_ROW==sqlite3_step(pStmt) ){
    sqlite3_int64 k = sqlite3_column_int64(pStmt, 0);
    sqlite3_value *pFrame = sqlite3_column_value(pStmt, 1);
    int nBlob = sqlite3_column_bytes(pStmt, 2);
    const u8 *pBlob = (const u8*)sqlite3_column_blob(pStmt, 2);

    int bAuto = zonefileIsAutoFrame(pFrame);

    if( zonefileCompareValue(pFrame, pPrev) 
     || (bAuto && sFrame.n && (sFrame.n+nBlob)>sWrite.maxAutoFrameSize)
    ){
      /* Add new entry to sFrame */
      if( zonefileBufferGrow(pCtx, &sFrameIdx, 4) 
       || zonefileAppendCompressed(pCtx, sWrite.pCmpData, pCmp, &sData, &sFrame)
      ){
        goto zone_write_out;
      }
      sFrame.n = 0;
      zonefileAppend32(&sFrameIdx, sData.n);
      sqlite3_value_free(pPrev);






      pPrev = sqlite3_value_dup(pFrame);
      if( pPrev==0 ){
        sqlite3_result_error_nomem(pCtx);
        goto zone_write_out;
      }
      nFrame++;
    }

    /* Add new entry to sKeyIdx */
    if( zonefileBufferGrow(pCtx, &sKeyIdx, ZONEFILE_SZ_KEYOFFSETS_ENTRY) ){
      goto zone_write_out;
    }
    zonefileAppend64(&sKeyIdx, k);
    zonefileAppend32(&sKeyIdx, nFrame-1);
    zonefileAppend32(&sKeyIdx, sFrame.n);
    zonefileAppend32(&sKeyIdx, nBlob);

    /* Add uncompressed data for new entry to sFrame */
    if( zonefileBufferGrow(pCtx, &sFrame, nBlob) ) goto zone_write_out;
    zonefileAppendBlob(&sFrame, pBlob, nBlob);
    nKey++;
  }

  if( sFrame.n>0

   && zonefileAppendCompressed(pCtx, sWrite.pCmpData, pCmp, &sData, &sFrame) 
  ){
    goto zone_write_out;
  }
  sqlite3_value_free(pPrev);

  pPrev = 0;


  /* If a compression method was specified, compress the key-index here */
  if( sWrite.pCmpIdx->eType!=ZONEFILE_COMPRESSION_NONE ){
    if( zonefileBufferGrow(pCtx, &sFrameIdx, sKeyIdx.n) ) goto zone_write_out;
    zonefileAppendBlob(&sFrameIdx, sKeyIdx.a, sKeyIdx.n);
    zonefileBufferFree(&sKeyIdx);
    rc = zonefileAppendCompressed(pCtx, sWrite.pCmpIdx, 0, &sKeyIdx,&sFrameIdx);







>
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>





<







|








>
|
>
|
|
|
|
<
>
|
>







843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873

874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896

897
898
899
900
901
902
903
904
905
906
  while( SQLITE_ROW==sqlite3_step(pStmt) ){
    sqlite3_int64 k = sqlite3_column_int64(pStmt, 0);
    sqlite3_value *pFrame = sqlite3_column_value(pStmt, 1);
    int nBlob = sqlite3_column_bytes(pStmt, 2);
    const u8 *pBlob = (const u8*)sqlite3_column_blob(pStmt, 2);

    int bAuto = zonefileIsAutoFrame(pFrame);
    if( sFrame.n>0 ){
      if( zonefileCompareValue(pFrame, pPrev) 
       || (bAuto && (sFrame.n+nBlob)>sWrite.maxAutoFrameSize)
      ){
        /* Add new entry to sFrame */
        if( zonefileBufferGrow(pCtx, &sFrameIdx, 4) 
         || zonefileAppendCompressed(pCtx, sWrite.pCmpData, pCmp,&sData,&sFrame)
        ){
          goto zone_write_out;
        }
        sFrame.n = 0;
        zonefileAppend32(&sFrameIdx, sData.n);
        sqlite3_value_free(pPrev);
        pPrev = 0;
        nFrame++;
      }
    }

    if( pPrev==0 ){
      pPrev = sqlite3_value_dup(pFrame);
      if( pPrev==0 ){
        sqlite3_result_error_nomem(pCtx);
        goto zone_write_out;
      }

    }

    /* Add new entry to sKeyIdx */
    if( zonefileBufferGrow(pCtx, &sKeyIdx, ZONEFILE_SZ_KEYOFFSETS_ENTRY) ){
      goto zone_write_out;
    }
    zonefileAppend64(&sKeyIdx, k);
    zonefileAppend32(&sKeyIdx, nFrame);
    zonefileAppend32(&sKeyIdx, sFrame.n);
    zonefileAppend32(&sKeyIdx, nBlob);

    /* Add uncompressed data for new entry to sFrame */
    if( zonefileBufferGrow(pCtx, &sFrame, nBlob) ) goto zone_write_out;
    zonefileAppendBlob(&sFrame, pBlob, nBlob);
    nKey++;
  }

  if( sFrame.n>0 ){
    if( zonefileBufferGrow(pCtx, &sFrameIdx, 4) 
     || zonefileAppendCompressed(pCtx, sWrite.pCmpData, pCmp, &sData, &sFrame)
    ){
      goto zone_write_out;
    }

    zonefileAppend32(&sFrameIdx, sData.n);
    nFrame++;
  }

  /* If a compression method was specified, compress the key-index here */
  if( sWrite.pCmpIdx->eType!=ZONEFILE_COMPRESSION_NONE ){
    if( zonefileBufferGrow(pCtx, &sFrameIdx, sKeyIdx.n) ) goto zone_write_out;
    zonefileAppendBlob(&sFrameIdx, sKeyIdx.a, sKeyIdx.n);
    zonefileBufferFree(&sKeyIdx);
    rc = zonefileAppendCompressed(pCtx, sWrite.pCmpIdx, 0, &sKeyIdx,&sFrameIdx);
929
930
931
932
933
934
935

936
937
938
939
940
941
942
    zonefileCtxError(pCtx, "error writing file \"%s\" (fclose())", zFile);
  }
  pFd = 0;

 zone_write_out:
  if( pCmp ) sWrite.pCmpData->xClose(pCmp);
  if( pFd ) fclose(pFd);

  sqlite3_finalize(pStmt);
  zonefileBufferFree(&sFrameIdx);
  zonefileBufferFree(&sKeyIdx);
  zonefileBufferFree(&sFrame);
  zonefileBufferFree(&sDict);
  zonefileBufferFree(&sData);
  zonefileBufferFree(&sSample);







>







938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
    zonefileCtxError(pCtx, "error writing file \"%s\" (fclose())", zFile);
  }
  pFd = 0;

 zone_write_out:
  if( pCmp ) sWrite.pCmpData->xClose(pCmp);
  if( pFd ) fclose(pFd);
  sqlite3_value_free(pPrev);
  sqlite3_finalize(pStmt);
  zonefileBufferFree(&sFrameIdx);
  zonefileBufferFree(&sKeyIdx);
  zonefileBufferFree(&sFrame);
  zonefileBufferFree(&sDict);
  zonefileBufferFree(&sData);
  zonefileBufferFree(&sSample);
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
    int iFrame = sqlite3_column_int(pCsr->pSelect, 2);
    u8 aSpace[8] = {0,0,0,0,0,0,0,0};
    u8 *aOff = aSpace;
    u8 *aFree = 0;
    if( hdr.compressionTypeIndexData ){
      int nFree = 0;
      rc = zonefileLoadIndex(&hdr, pFd, &aFree, &nFree, &zErr);
      if( rc==SQLITE_OK ) aOff = &aFree[4*iFrame];
    }else{
      rc = zonefileFileRead(pFd, aOff, 8, ZONEFILE_SZ_HEADER + 4 * iFrame);
    }
    iOff = zonefileGet32(aOff);
    if( iFrame+1<hdr.numFrames ){
      szFrame = zonefileGet32(&aOff[4]) - iOff;
    }else{
      fseek(pFd, 0, SEEK_END);
      szFrame = (u32)ftell(pFd) - iOff - hdr.byteOffsetFrames;
    }
    sqlite3_free(aFree);
  }

  /* Read some data into memory. If the data is uncompressed, then just
  ** the required record is read. Otherwise, the entire frame is read
  ** into memory.  */







|

|

|
|
|
<
<
|







1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927


1928
1929
1930
1931
1932
1933
1934
1935
    int iFrame = sqlite3_column_int(pCsr->pSelect, 2);
    u8 aSpace[8] = {0,0,0,0,0,0,0,0};
    u8 *aOff = aSpace;
    u8 *aFree = 0;
    if( hdr.compressionTypeIndexData ){
      int nFree = 0;
      rc = zonefileLoadIndex(&hdr, pFd, &aFree, &nFree, &zErr);
      if( rc==SQLITE_OK ) aOff = &aFree[4*(iFrame-1)];
    }else{
      rc = zonefileFileRead(pFd, aOff, 8, ZONEFILE_SZ_HEADER + 4 * (iFrame-1));
    }
    szFrame = zonefileGet32(&aOff[4]);
    if( iFrame>0 ){
      iOff = zonefileGet32(aOff);


      szFrame = szFrame - iOff;
    }
    sqlite3_free(aFree);
  }

  /* Read some data into memory. If the data is uncompressed, then just
  ** the required record is read. Otherwise, the entire frame is read
  ** into memory.  */