/ Check-in [7eb022d7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:7eb022d7e5fdb180af823c82c47c938e4a7a355f
User & Date: dan 2015-02-25 19:24:37
Context
2015-02-26
14:54
Fix an fts5 bug in large incremental merges. check-in: 208e3cb6 user: dan tags: fts5
2015-02-25
19:24
Instead of the 4-byte fields, use regular varints for the poslist-size field in fts5_hash.c. check-in: 7eb022d7 user: dan tags: fts5
2015-02-02
11:58
Ensure generated header file fts5parse.h is included in sqlite3.c. check-in: bc7be2fc user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

   347    347   int sqlite3Fts5IndexReinit(Fts5Index *p);
   348    348   int sqlite3Fts5IndexOptimize(Fts5Index *p);
   349    349   
   350    350   int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
   351    351   
   352    352   int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
   353    353   #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
          354  +
          355  +int sqlite3Fts5GetVarintLen(u32 iVal);
   354    356   
   355    357   /*
   356    358   ** End of interface to code in fts5_index.c.
   357    359   **************************************************************************/
   358    360   
   359    361   /**************************************************************************
   360    362   ** Interface to code in fts5_hash.c. 

Changes to ext/fts5/fts5_hash.c.

   179    179     }
   180    180   
   181    181     sqlite3_free(apOld);
   182    182     pHash->nSlot = nNew;
   183    183     pHash->aSlot = apNew;
   184    184     return SQLITE_OK;
   185    185   }
          186  +
          187  +static void fts5HashAddPoslistSize(Fts5HashEntry *p){
          188  +  if( p->iSzPoslist ){
          189  +    u8 *pPtr = (u8*)p;
          190  +    int nSz = p->nData - p->iSzPoslist - 1;
          191  +
          192  +    if( nSz<=127 ){
          193  +      pPtr[p->iSzPoslist] = nSz;
          194  +    }else{
          195  +      int nByte = sqlite3Fts5GetVarintLen((u32)nSz);
          196  +      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
          197  +      sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz);
          198  +      p->nData += (nByte-1);
          199  +    }
          200  +    p->iSzPoslist = 0;
          201  +  }
          202  +}
   186    203   
   187    204   int sqlite3Fts5HashWrite(
   188    205     Fts5Hash *pHash,
   189    206     i64 iRowid,                     /* Rowid for this entry */
   190    207     int iCol,                       /* Column token appears in (-ve -> delete) */
   191    208     int iPos,                       /* Position of token within column */
   192    209     const char *pToken, int nToken  /* Token to add or remove to or from index */
................................................................................
   217    234       memset(p, 0, sizeof(Fts5HashEntry));
   218    235       p->nAlloc = nByte;
   219    236       memcpy(p->zKey, pToken, nToken);
   220    237       p->zKey[nToken] = '\0';
   221    238       p->nData = nToken + 1 + sizeof(Fts5HashEntry);
   222    239       p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid);
   223    240       p->iSzPoslist = p->nData;
   224         -    p->nData += 4;
          241  +    p->nData += 1;
   225    242       p->iRowid = iRowid;
   226    243       p->pHashNext = pHash->aSlot[iHash];
   227    244       pHash->aSlot[iHash] = p;
   228    245       pHash->nEntry++;
   229    246       nIncr += p->nData;
   230    247     }
   231    248   
   232    249     /* Check there is enough space to append a new entry. Worst case scenario
   233    250     ** is:
   234    251     **
   235    252     **     + 9 bytes for a new rowid,
   236         -  **     + 4 bytes reserved for the "poslist size" varint.
          253  +  **     + 4 byte reserved for the "poslist size" varint.
   237    254     **     + 1 byte for a "new column" byte,
   238    255     **     + 3 bytes for a new column number (16-bit max) as a varint,
   239    256     **     + 5 bytes for the new position offset (32-bit max).
   240    257     */
   241    258     if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
   242    259       int nNew = p->nAlloc * 2;
   243    260       Fts5HashEntry *pNew;
................................................................................
   251    268     }
   252    269     pPtr = (u8*)p;
   253    270     nIncr -= p->nData;
   254    271   
   255    272     /* If this is a new rowid, append the 4-byte size field for the previous
   256    273     ** entry, and the new rowid for this entry.  */
   257    274     if( iRowid!=p->iRowid ){
   258         -    assert( p->iSzPoslist>0 );
   259         -    fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4);
          275  +    fts5HashAddPoslistSize(p);
   260    276       p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
   261    277       p->iSzPoslist = p->nData;
   262         -    p->nData += 4;
          278  +    p->nData += 1;
   263    279       p->iCol = 0;
   264    280       p->iPos = 0;
   265    281       p->iRowid = iRowid;
   266    282     }
   267    283   
   268    284     if( iCol>=0 ){
   269    285       /* Append a new column value, if necessary */
................................................................................
   389    405   
   390    406     rc = fts5HashEntrySort(pHash, 0, 0, &pList);
   391    407     if( rc==SQLITE_OK ){
   392    408       memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot);
   393    409       while( pList ){
   394    410         Fts5HashEntry *pNext = pList->pScanNext;
   395    411         if( rc==SQLITE_OK ){
   396         -        const int nSz = pList->nData - pList->iSzPoslist - 4;
   397    412           const int nKey = strlen(pList->zKey);
   398    413           i64 iRowid = 0;
   399    414           u8 *pPtr = (u8*)pList;
   400    415           int iOff = sizeof(Fts5HashEntry) + nKey + 1;
   401    416   
   402    417           /* Fill in the final poslist size field */
   403         -        fts5Put4ByteVarint(&pPtr[pList->iSzPoslist], nSz);
          418  +        fts5HashAddPoslistSize(pList);
   404    419           
   405    420           /* Issue the new-term callback */
   406    421           rc = xTerm(pCtx, pList->zKey, nKey);
   407    422   
   408    423           /* Issue the xEntry callbacks */
   409    424           while( rc==SQLITE_OK && iOff<pList->nData ){
   410    425             i64 iDelta;             /* Rowid delta value */
   411    426             int nPoslist;           /* Size of position list in bytes */
   412    427             int nVarint;
   413    428             iOff += getVarint(&pPtr[iOff], (u64*)&iDelta);
   414    429             iRowid += iDelta;
   415         -          nPoslist = fts5Get4ByteVarint(&pPtr[iOff], &nVarint);
   416         -          iOff += 4;
   417         -          rc = xEntry(pCtx, iRowid, &pPtr[iOff-nVarint], nPoslist+nVarint);
   418         -          iOff += nPoslist;
          430  +          nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist);
          431  +          rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint);
          432  +          iOff += nVarint+nPoslist;
   419    433           }
   420    434   
   421    435           /* Issue the term-done callback */
   422    436           if( rc==SQLITE_OK ) rc = xTermDone(pCtx);
   423    437         }
   424    438         sqlite3_free(pList);
   425    439         pList = pNext;
................................................................................
   441    455     Fts5HashEntry *p;
   442    456   
   443    457     for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
   444    458       if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
   445    459     }
   446    460   
   447    461     if( p ){
   448         -    u8 *pPtr = (u8*)p;
   449         -    fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4);
          462  +    fts5HashAddPoslistSize(p);
   450    463       *ppDoclist = &p->zKey[nTerm+1];
   451    464       *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
   452    465     }else{
   453    466       *ppDoclist = 0;
   454    467       *pnDoclist = 0;
   455    468     }
   456    469   
................................................................................
   478    491     Fts5Hash *pHash,
   479    492     const char **pzTerm,            /* OUT: term (nul-terminated) */
   480    493     const char **ppDoclist,         /* OUT: pointer to doclist */
   481    494     int *pnDoclist                  /* OUT: size of doclist in bytes */
   482    495   ){
   483    496     Fts5HashEntry *p;
   484    497     if( (p = pHash->pScan) ){
   485         -    u8 *pPtr = (u8*)p;
   486    498       int nTerm = strlen(p->zKey);
   487         -    fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4);
          499  +    fts5HashAddPoslistSize(p);
   488    500       *pzTerm = p->zKey;
   489    501       *ppDoclist = &p->zKey[nTerm+1];
   490    502       *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
   491    503     }else{
   492    504       *pzTerm = 0;
   493    505       *ppDoclist = 0;
   494    506       *pnDoclist = 0;
   495    507     }
   496    508   }
   497    509   

Changes to ext/fts5/fts5_index.c.

   367    367   
   368    368   /*
   369    369   ** An object of type Fts5SegWriter is used to write to segments.
   370    370   */
   371    371   struct Fts5PageWriter {
   372    372     int pgno;                       /* Page number for this page */
   373    373     Fts5Buffer buf;                 /* Buffer containing page data */
   374         -  Fts5Buffer term;              /* Buffer containing previous term on page */
          374  +  Fts5Buffer term;                /* Buffer containing previous term on page */
   375    375   };
   376    376   struct Fts5SegWriter {
   377    377     int iIdx;                       /* Index to write to */
   378    378     int iSegid;                     /* Segid to write to */
   379    379     int nWriter;                    /* Number of entries in aWriter */
   380    380     Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
   381    381     i64 iPrevRowid;                 /* Previous docid written to current leaf */
................................................................................
   662    662       p -= 2;
   663    663       n = sqlite3GetVarint(p, &v64);
   664    664       *v = (u32)v64;
   665    665       assert( n>3 && n<=9 );
   666    666       return n;
   667    667     }
   668    668   }
          669  +
          670  +int sqlite3Fts5GetVarintLen(u32 iVal){
          671  +  if( iVal<(1 << 7 ) ) return 1;
          672  +  if( iVal<(1 << 14) ) return 2;
          673  +  if( iVal<(1 << 21) ) return 3;
          674  +  if( iVal<(1 << 28) ) return 4;
          675  +  return 5;
          676  +}
   669    677   
   670    678   /*
   671    679   ** Allocate and return a buffer at least nByte bytes in size.
   672    680   **
   673    681   ** If an OOM error is encountered, return NULL and set the error code in
   674    682   ** the Fts5Index handle passed as the first argument.
   675    683   */

Changes to ext/fts5/tool/loadfts5.tcl.

    32     32   proc usage {} {
    33     33     puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
    34     34     puts stderr ""
    35     35     puts stderr "Switches are:"
    36     36     puts stderr "  -fts4        (use fts4 instead of fts5)"
    37     37     puts stderr "  -fts5        (use fts5)"
    38     38     puts stderr "  -porter      (use porter tokenizer)"
           39  +  puts stderr "  -delete      (delete the database file before starting)"
    39     40     puts stderr "  -limit N     (load no more than N documents)"
    40     41     puts stderr "  -automerge N (set the automerge parameter to N)"
    41     42     puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
    42     43     exit 1
    43     44   }
    44     45   
    45     46   set O(vtab)       fts5
    46     47   set O(tok)        ""
    47     48   set O(limit)      0
           49  +set O(delete)     0
    48     50   set O(automerge)  -1
    49     51   set O(crisismerge)  -1
    50     52   
    51     53   if {[llength $argv]<2} usage
    52     54   set nOpt [expr {[llength $argv]-2}]
    53     55   for {set i 0} {$i < $nOpt} {incr i} {
    54     56     set arg [lindex $argv $i]
................................................................................
    60     62       -fts5 {
    61     63         set O(vtab) fts5
    62     64       }
    63     65   
    64     66       -porter {
    65     67         set O(tok) ", tokenize=porter"
    66     68       }
           69  +
           70  +    -delete {
           71  +      set O(delete) 1
           72  +    }
    67     73   
    68     74       -limit {
    69     75         if { [incr i]>=$nOpt } usage
    70     76         set O(limit) [lindex $argv $i]
    71     77       }
    72     78       
    73     79       -automerge {
................................................................................
    82     88   
    83     89       default {
    84     90         usage
    85     91       }
    86     92     }
    87     93   }
    88     94   
    89         -sqlite3 db [lindex $argv end-1]
           95  +set dbfile [lindex $argv end-1]
           96  +if {$O(delete)} { file delete -force $dbfile }
           97  +sqlite3 db $dbfile
    90     98   db func loadfile loadfile
    91     99   
    92    100   db transaction {
    93    101     catch {
    94    102       db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))"
    95    103     }
    96    104     if {$O(automerge)>=0} {