/ Check-in [0c0c4ae9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the fts5 leaf page format to permit faster seek operations. This is a file-format change. Any existing databases can be upgraded by running the fts5 'rebuild' command.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0c0c4ae971e54efc526eed7bd071c90dfadb95ff
User & Date: dan 2015-09-10 17:23:37
Context
2015-09-10
19:22
Fix a potential NULL pointer deref in the testing logic of pcache1. NB: The -DSQLITE_TEST compile-time option is needed to hit the problem. check-in: f5580f08 user: drh tags: trunk
17:23
Modify the fts5 leaf page format to permit faster seek operations. This is a file-format change. Any existing databases can be upgraded by running the fts5 'rebuild' command. check-in: 0c0c4ae9 user: dan tags: trunk
17:20
Create separate "path" and "root" columns in the json_each() and json_tree() virtual tables. "Root" is the 2nd parameter and is fixed. "Path" varies as json_tree() walks the hierarchy. check-in: 127cce3e user: drh tags: trunk
16:39
Increment the fts5 version value to indicate that the on-disk format has changed. Closed-Leaf check-in: 99de5e36 user: dan tags: fts5-incompatible
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

   113    113   **   This exists in order to allow the fts5_index.c module to return a 
   114    114   **   decent error message if it encounters a file-format version it does
   115    115   **   not understand.
   116    116   **
   117    117   ** bColumnsize:
   118    118   **   True if the %_docsize table is created.
   119    119   **
          120  +** bPrefixIndex:
          121  +**   This is only used for debugging. If set to false, any prefix indexes
          122  +**   are ignored. This value is configured using:
          123  +**
          124  +**       INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
          125  +**
   120    126   */
   121    127   struct Fts5Config {
   122    128     sqlite3 *db;                    /* Database handle */
   123    129     char *zDb;                      /* Database holding FTS index (e.g. "main") */
   124    130     char *zName;                    /* Name of FTS index */
   125    131     int nCol;                       /* Number of columns */
   126    132     char **azCol;                   /* Column names */
................................................................................
   141    147     int nAutomerge;                 /* 'automerge' setting */
   142    148     int nCrisisMerge;               /* Maximum allowed segments per level */
   143    149     char *zRank;                    /* Name of rank function */
   144    150     char *zRankArgs;                /* Arguments to rank function */
   145    151   
   146    152     /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
   147    153     char **pzErrmsg;
          154  +
          155  +#ifdef SQLITE_DEBUG
          156  +  int bPrefixIndex;               /* True to use prefix-indexes */
          157  +#endif
   148    158   };
   149    159   
   150    160   /* Current expected value of %_config table 'version' field */
   151         -#define FTS5_CURRENT_VERSION 3
          161  +#define FTS5_CURRENT_VERSION 4
   152    162   
   153    163   #define FTS5_CONTENT_NORMAL   0
   154    164   #define FTS5_CONTENT_NONE     1
   155    165   #define FTS5_CONTENT_EXTERNAL 2
   156    166   
   157    167   
   158    168   

Changes to ext/fts5/fts5_buffer.c.

    12     12   */
    13     13   
    14     14   
    15     15   
    16     16   #include "fts5Int.h"
    17     17   
    18     18   int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
    19         -  /* A no-op if an error has already occurred */
    20         -  if( *pRc ) return 1;
    21     19   
    22     20     if( (pBuf->n + nByte) > pBuf->nSpace ){
    23     21       u8 *pNew;
    24     22       int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;
           23  +
           24  +    /* A no-op if an error has already occurred */
           25  +    if( *pRc ) return 1;
           26  +
    25     27       while( nNew<(pBuf->n + nByte) ){
    26     28         nNew = nNew * 2;
    27     29       }
    28     30       pNew = sqlite3_realloc(pBuf->p, nNew);
    29     31       if( pNew==0 ){
    30     32         *pRc = SQLITE_NOMEM;
    31     33         return 1;

Changes to ext/fts5/fts5_config.c.

   476    476   
   477    477     nByte = nArg * (sizeof(char*) + sizeof(u8));
   478    478     pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
   479    479     pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
   480    480     pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
   481    481     pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
   482    482     pRet->bColumnsize = 1;
          483  +#ifdef SQLITE_DEBUG
          484  +  pRet->bPrefixIndex = 1;
          485  +#endif
   483    486     if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
   484    487       *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
   485    488       rc = SQLITE_ERROR;
   486    489     }
   487    490   
   488    491     for(i=3; rc==SQLITE_OK && i<nArg; i++){
   489    492       const char *zOrig = azArg[i];

Changes to ext/fts5/fts5_index.c.

    83     83   **
    84     84   **   Then, for each level from 0 to nMax:
    85     85   **
    86     86   **     + number of input segments in ongoing merge.
    87     87   **     + total number of segments in level.
    88     88   **     + for each segment from oldest to newest:
    89     89   **         + segment id (always > 0)
    90         -**         + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
    91     90   **         + first leaf page number (often 1, always greater than 0)
    92     91   **         + final leaf page number
    93     92   **
    94     93   ** 2. The Averages Record:
    95     94   **
    96     95   **   A single record within the %_data table. The data is a list of varints.
    97     96   **   The first value is the number of rows in the index. Then, for each column
    98         -**   from left to right, the total number of tokens in the column for all 
           97  +**   from left to right, the total number of tokens in the column for all
    99     98   **   rows of the table.
   100     99   **
   101    100   ** 3. Segment leaves:
   102    101   **
   103         -**   TERM DOCLIST FORMAT:
          102  +**   TERM/DOCLIST FORMAT:
   104    103   **
   105    104   **     Most of each segment leaf is taken up by term/doclist data. The 
   106         -**     general format of the term/doclist data is:
          105  +**     general format of term/doclist, starting with the first term
          106  +**     on the leaf page, is:
   107    107   **
   108    108   **         varint : size of first term
   109    109   **         blob:    first term data
   110    110   **         doclist: first doclist
   111    111   **         zero-or-more {
   112    112   **           varint:  number of bytes in common with previous term
   113    113   **           varint:  number of bytes of new term data (nNew)
................................................................................
   119    119   **
   120    120   **         varint:  first rowid
   121    121   **         poslist: first poslist
   122    122   **         zero-or-more {
   123    123   **           varint:  rowid delta (always > 0)
   124    124   **           poslist: next poslist
   125    125   **         }
   126         -**         0x00 byte
   127    126   **
   128    127   **     poslist format:
   129    128   **
   130    129   **         varint: size of poslist in bytes multiplied by 2, not including
   131    130   **                 this field. Plus 1 if this entry carries the "delete" flag.
   132    131   **         collist: collist for column 0
   133    132   **         zero-or-more {
................................................................................
   139    138   **     collist format:
   140    139   **
   141    140   **         varint: first offset + 2
   142    141   **         zero-or-more {
   143    142   **           varint: offset delta + 2
   144    143   **         }
   145    144   **
   146         -**   PAGINATION
          145  +**   PAGE FORMAT
   147    146   **
   148         -**     The format described above is only accurate if the entire term/doclist
   149         -**     data fits on a single leaf page. If this is not the case, the format
   150         -**     is changed in two ways:
          147  +**     Each leaf page begins with a 4-byte header containing 2 16-bit 
          148  +**     unsigned integer fields in big-endian format. They are:
          149  +**
          150  +**       * The byte offset of the first rowid on the page, if it exists
          151  +**         and occurs before the first term (otherwise 0).
          152  +**
          153  +**       * The byte offset of the start of the page footer. If the page
          154  +**         footer is 0 bytes in size, then this field is the same as the
          155  +**         size of the leaf page in bytes.
          156  +**
          157  +**     The page footer consists of a single varint for each term located
          158  +**     on the page. Each varint is the byte offset of the current term
          159  +**     within the page, delta-compressed against the previous value. In
          160  +**     other words, the first varint in the footer is the byte offset of
          161  +**     the first term, the second is the byte offset of the second less that
          162  +**     of the first, and so on.
          163  +**
          164  +**     The term/doclist format described above is accurate if the entire
          165  +**     term/doclist data fits on a single leaf page. If this is not the case,
          166  +**     the format is changed in two ways:
   151    167   **
   152    168   **       + if the first rowid on a page occurs before the first term, it
   153    169   **         is stored as a literal value:
   154    170   **
   155    171   **             varint:  first rowid
   156    172   **
   157    173   **       + the first term on each page is stored in the same way as the
   158    174   **         very first term of the segment:
   159    175   **
   160    176   **             varint : size of first term
   161    177   **             blob:    first term data
   162    178   **
   163         -**     Each leaf page begins with:
   164         -**
   165         -**       + 2-byte unsigned containing offset to first rowid (or 0).
   166         -**       + 2-byte unsigned containing offset to first term (or 0).
   167         -**
   168         -**   Followed by term/doclist data.
   169         -**
   170         -** 4. Segment interior nodes:
   171         -**
   172         -**   The interior nodes turn the list of leaves into a b+tree. 
   173         -**
   174         -**   Each interior node begins with a varint - the page number of the left
   175         -**   most child node. Following this, for each leaf page except the first,
   176         -**   the interior nodes contain:
   177         -**
   178         -**     a) If the leaf page contains at least one term, then a term-prefix that
   179         -**        is greater than all previous terms, and less than or equal to the
   180         -**        first term on the leaf page.
   181         -**
   182         -**     b) If the leaf page no terms, a record indicating how many consecutive
   183         -**        leaves contain no terms, and whether or not there is an associated
   184         -**        by-rowid index record.
   185         -**
   186         -**   By definition, there is never more than one type (b) record in a row.
   187         -**   Type (b) records only ever appear on height=1 pages - immediate parents
   188         -**   of leaves. Only type (a) records are pushed to higher levels.
   189         -**
   190         -**   Term format:
   191         -**
   192         -**     * Number of bytes in common with previous term plus 2, as a varint.
   193         -**     * Number of bytes of new term data, as a varint.
   194         -**     * new term data.
   195         -**
   196         -**   No-term format:
   197         -**
   198         -**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
   199         -**       is an associated index-by-rowid record.
   200         -**     * the number of zero-term leaves as a varint.
   201         -**
   202    179   ** 5. Segment doclist indexes:
   203    180   **
   204    181   **   Doclist indexes are themselves b-trees, however they usually consist of
   205    182   **   a single leaf record only. The format of each doclist index leaf page 
   206    183   **   is:
   207    184   **
   208    185   **     * Flags byte. Bits are:
................................................................................
   233    210   /*
   234    211   ** Rowids for the averages and structure records in the %_data table.
   235    212   */
   236    213   #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
   237    214   #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
   238    215   
   239    216   /*
   240         -** Macros determining the rowids used by segment nodes. All nodes in all
   241         -** segments for all indexes (the regular FTS index and any prefix indexes)
   242         -** are stored in the %_data table with large positive rowids.
          217  +** Macros determining the rowids used by segment leaves and dlidx leaves
          218  +** and nodes. All nodes and leaves are stored in the %_data table with large
          219  +** positive rowids.
   243    220   **
   244         -** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS) 
   245         -** indexes - one regular term index and zero or more prefix indexes.
          221  +** Each segment has a unique non-zero 16-bit id.
   246    222   **
   247         -** Each segment in an index has a unique id greater than zero.
   248         -**
   249         -** Each node in a segment b-tree is assigned a "page number" that is unique
   250         -** within nodes of its height within the segment (leaf nodes have a height 
   251         -** of 0, parents 1, etc.). Page numbers are allocated sequentially so that
   252         -** a nodes page number is always one more than its left sibling.
   253         -**
   254         -** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
   255         -** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
   256         -** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
   257         -** SQLITE_FULL and fails the current operation if they ever prove too small.
          223  +** The rowid for each segment leaf is found by passing the segment id and 
          224  +** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
          225  +** sequentially starting from 1.
   258    226   */
   259    227   #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
   260    228   #define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
   261         -#define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
          229  +#define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
   262    230   #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
   263    231   
   264    232   #define fts5_dri(segid, dlidx, height, pgno) (                                 \
   265    233    ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
   266    234    ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
   267    235    ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
   268    236    ((i64)(pgno))                                                                 \
   269    237   )
   270    238   
   271         -#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
   272         -#define FTS5_DLIDX_ROWID(segid, height, pgno)   fts5_dri(segid, 1, height, pgno)
          239  +#define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
          240  +#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
   273    241   
   274    242   /*
   275    243   ** Maximum segments permitted in a single index 
   276    244   */
   277    245   #define FTS5_MAX_SEGMENT 2000
   278    246   
   279    247   #ifdef SQLITE_DEBUG
................................................................................
   299    267   typedef struct Fts5SegWriter Fts5SegWriter;
   300    268   typedef struct Fts5Structure Fts5Structure;
   301    269   typedef struct Fts5StructureLevel Fts5StructureLevel;
   302    270   typedef struct Fts5StructureSegment Fts5StructureSegment;
   303    271   
   304    272   struct Fts5Data {
   305    273     u8 *p;                          /* Pointer to buffer containing record */
   306         -  int n;                          /* Size of record in bytes */
          274  +  int nn;                         /* Size of record in bytes */
          275  +  int szLeaf;                     /* Size of leaf without page-index */
   307    276   };
   308    277   
   309    278   /*
   310    279   ** One object per %_data table.
   311    280   */
   312    281   struct Fts5Index {
   313    282     Fts5Config *pConfig;            /* Virtual table configuration */
................................................................................
   351    320   /*
   352    321   ** The contents of the "structure" record for each index are represented
   353    322   ** using an Fts5Structure record in memory. Which uses instances of the 
   354    323   ** other Fts5StructureXXX types as components.
   355    324   */
   356    325   struct Fts5StructureSegment {
   357    326     int iSegid;                     /* Segment id */
   358         -  int nHeight;                    /* Height of segment b-tree */
   359    327     int pgnoFirst;                  /* First leaf page number in segment */
   360    328     int pgnoLast;                   /* Last leaf page number in segment */
   361    329   };
   362    330   struct Fts5StructureLevel {
   363    331     int nMerge;                     /* Number of segments in incr-merge */
   364    332     int nSeg;                       /* Total number of segments on level */
   365    333     Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
................................................................................
   373    341   };
   374    342   
   375    343   /*
   376    344   ** An object of type Fts5SegWriter is used to write to segments.
   377    345   */
   378    346   struct Fts5PageWriter {
   379    347     int pgno;                       /* Page number for this page */
   380         -  Fts5Buffer buf;                 /* Buffer containing page data */
          348  +  int iPrevPgidx;                 /* Previous value written into pgidx */
          349  +  Fts5Buffer buf;                 /* Buffer containing leaf data */
          350  +  Fts5Buffer pgidx;               /* Buffer containing page-index */
   381    351     Fts5Buffer term;                /* Buffer containing previous term on page */
   382    352   };
   383    353   struct Fts5DlidxWriter {
   384    354     int pgno;                       /* Page number for this page */
   385    355     int bPrevValid;                 /* True if iPrev is valid */
   386    356     i64 iPrev;                      /* Previous rowid value written to page */
   387    357     Fts5Buffer buf;                 /* Buffer containing page data */
................................................................................
   388    358   };
   389    359   struct Fts5SegWriter {
   390    360     int iSegid;                     /* Segid to write to */
   391    361     Fts5PageWriter writer;          /* PageWriter object */
   392    362     i64 iPrevRowid;                 /* Previous rowid written to current leaf */
   393    363     u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
   394    364     u8 bFirstRowidInPage;           /* True if next rowid is first in page */
          365  +  /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
   395    366     u8 bFirstTermInPage;            /* True if next term will be first in leaf */
   396    367     int nLeafWritten;               /* Number of leaf pages written */
   397    368     int nEmpty;                     /* Number of contiguous term-less nodes */
   398    369   
   399    370     int nDlidx;                     /* Allocated size of aDlidx[] array */
   400    371     Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
   401    372   
................................................................................
   468    439   **
   469    440   ** iRowidOffset/nRowidOffset/aRowidOffset:
   470    441   **     These are used if the FTS5_SEGITER_REVERSE flag is set.
   471    442   **
   472    443   **     For each rowid on the page corresponding to the current term, the
   473    444   **     corresponding aRowidOffset[] entry is set to the byte offset of the
   474    445   **     start of the "position-list-size" field within the page.
          446  +**
          447  +** iTermIdx:
          448  +**     Index of current term on iTermLeafPgno.
   475    449   */
   476    450   struct Fts5SegIter {
   477    451     Fts5StructureSegment *pSeg;     /* Segment to iterate through */
   478    452     int flags;                      /* Mask of configuration flags */
   479    453     int iLeafPgno;                  /* Current leaf page number */
   480    454     Fts5Data *pLeaf;                /* Current leaf data */
   481    455     Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
   482    456     int iLeafOffset;                /* Byte offset within current leaf */
   483    457   
   484    458     /* The page and offset from which the current term was read. The offset 
   485    459     ** is the offset of the first rowid in the current doclist.  */
   486    460     int iTermLeafPgno;
   487    461     int iTermLeafOffset;
          462  +
          463  +  int iPgidxOff;                  /* Next offset in pgidx */
          464  +  int iEndofDoclist;
   488    465   
   489    466     /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
   490    467     int iRowidOffset;               /* Current entry in aRowidOffset[] */
   491    468     int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
   492    469     int *aRowidOffset;              /* Array of offset to rowid fields */
   493    470   
   494    471     Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */
................................................................................
   495    472   
   496    473     /* Variables populated based on current entry. */
   497    474     Fts5Buffer term;                /* Current term */
   498    475     i64 iRowid;                     /* Current rowid */
   499    476     int nPos;                       /* Number of bytes in current position list */
   500    477     int bDel;                       /* True if the delete flag is set */
   501    478   };
          479  +
          480  +/*
          481  +** Argument is a pointer to an Fts5Data structure that contains a 
          482  +** leaf page.
          483  +*/
          484  +#define ASSERT_SZLEAF_OK(x) assert( \
          485  +    (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
          486  +)
   502    487   
   503    488   #define FTS5_SEGITER_ONETERM 0x01
   504    489   #define FTS5_SEGITER_REVERSE 0x02
   505    490   
   506    491   
          492  +/* 
          493  +** Argument is a pointer to an Fts5Data structure that contains a leaf
          494  +** page. This macro evaluates to true if the leaf contains no terms, or
          495  +** false if it contains at least one term.
          496  +*/
          497  +#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
          498  +
          499  +#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
          500  +
          501  +#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
          502  +
   507    503   /*
   508    504   ** poslist:
   509    505   **   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
   510    506   **   There is no way to tell if this is populated or not.
   511    507   */
   512    508   struct Fts5IndexIter {
   513    509     Fts5Index *pIndex;              /* Index that owns this iterator */
................................................................................
   614    610   ){
   615    611     int nCmp = MIN(nLeft, nRight);
   616    612     int res = memcmp(pLeft, pRight, nCmp);
   617    613     return (res==0 ? (nLeft - nRight) : res);
   618    614   }
   619    615   #endif
   620    616   
          617  +static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
          618  +  int ret;
          619  +  fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
          620  +  return ret;
          621  +}
   621    622   
   622    623   /*
   623    624   ** Close the read-only blob handle, if it is open.
   624    625   */
   625    626   static void fts5CloseReader(Fts5Index *p){
   626    627     if( p->pReader ){
   627    628       sqlite3_blob *pReader = p->pReader;
................................................................................
   675    676   
   676    677       if( rc==SQLITE_OK ){
   677    678         u8 *aOut = 0;               /* Read blob data into this buffer */
   678    679         int nByte = sqlite3_blob_bytes(p->pReader);
   679    680         int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
   680    681         pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
   681    682         if( pRet ){
   682         -        pRet->n = nByte;
          683  +        pRet->nn = nByte;
   683    684           aOut = pRet->p = (u8*)&pRet[1];
   684    685         }else{
   685    686           rc = SQLITE_NOMEM;
   686    687         }
   687    688   
   688    689         if( rc==SQLITE_OK ){
   689    690           rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
   690    691         }
   691    692         if( rc!=SQLITE_OK ){
   692    693           sqlite3_free(pRet);
   693    694           pRet = 0;
          695  +      }else{
          696  +        /* TODO1: Fix this */
          697  +        pRet->szLeaf = fts5GetU16(&pRet->p[2]);
   694    698         }
   695    699       }
   696    700       p->rc = rc;
   697    701       p->nRead++;
   698    702     }
   699    703   
   700    704     assert( (pRet==0)==(p->rc!=SQLITE_OK) );
................................................................................
   781    785     p->rc = sqlite3_reset(p->pDeleter);
   782    786   }
   783    787   
   784    788   /*
   785    789   ** Remove all records associated with segment iSegid.
   786    790   */
   787    791   static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
   788         -  i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
   789         -  i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
          792  +  i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
          793  +  i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
   790    794     fts5DataDelete(p, iFirst, iLast);
   791    795     if( p->pIdxDeleter==0 ){
   792    796       Fts5Config *pConfig = p->pConfig;
   793    797       fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
   794    798             "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
   795    799             pConfig->zDb, pConfig->zName
   796    800       ));
................................................................................
   879    883             nTotal * sizeof(Fts5StructureSegment)
   880    884         );
   881    885   
   882    886         if( rc==SQLITE_OK ){
   883    887           pLvl->nSeg = nTotal;
   884    888           for(iSeg=0; iSeg<nTotal; iSeg++){
   885    889             i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
   886         -          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight);
   887    890             i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
   888    891             i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
   889    892           }
   890    893         }else{
   891    894           fts5StructureRelease(pRet);
   892    895           pRet = 0;
   893    896         }
................................................................................
   970    973     Fts5Structure *pRet = 0;        /* Object to return */
   971    974     int iCookie;                    /* Configuration cookie */
   972    975     Fts5Data *pData;
   973    976     Fts5Buffer buf = {0, 0, 0};
   974    977   
   975    978     pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
   976    979     if( p->rc ) return 0;
   977         -  memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
   978         -  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
          980  +  /* TODO: Do we need this if the leaf-index is appended? Probably... */
          981  +  memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
          982  +  p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
   979    983     if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
   980    984       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
   981    985     }
   982    986   
   983    987     fts5DataRelease(pData);
   984    988     if( p->rc!=SQLITE_OK ){
   985    989       fts5StructureRelease(pRet);
................................................................................
  1035   1039         Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  1036   1040         fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
  1037   1041         fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
  1038   1042         assert( pLvl->nMerge<=pLvl->nSeg );
  1039   1043   
  1040   1044         for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
  1041   1045           fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
  1042         -        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight);
  1043   1046           fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
  1044   1047           fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
  1045   1048         }
  1046   1049       }
  1047   1050   
  1048   1051       fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
  1049   1052       fts5BufferFree(&buf);
................................................................................
  1124   1127   ){
  1125   1128     if( p->rc==SQLITE_OK ){
  1126   1129       int iTst;
  1127   1130       int iPromote = -1;
  1128   1131       int szPromote = 0;            /* Promote anything this size or smaller */
  1129   1132       Fts5StructureSegment *pSeg;   /* Segment just written */
  1130   1133       int szSeg;                    /* Size of segment just written */
         1134  +    int nSeg = pStruct->aLevel[iLvl].nSeg;
  1131   1135   
  1132         -
         1136  +    if( nSeg==0 ) return;
  1133   1137       pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
  1134   1138       szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
  1135   1139   
  1136   1140       /* Check for condition (a) */
  1137   1141       for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
  1138   1142       if( iTst>=0 ){
  1139   1143         int i;
................................................................................
  1174   1178       assert( pLvl->bEof==0 );
  1175   1179       pLvl->iOff = 1;
  1176   1180       pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
  1177   1181       pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
  1178   1182       pLvl->iFirstOff = pLvl->iOff;
  1179   1183     }else{
  1180   1184       int iOff;
  1181         -    for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
         1185  +    for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
  1182   1186         if( pData->p[iOff] ) break; 
  1183   1187       }
  1184   1188   
  1185         -    if( iOff<pData->n ){
         1189  +    if( iOff<pData->nn ){
  1186   1190         i64 iVal;
  1187   1191         pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
  1188   1192         iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
  1189   1193         pLvl->iRowid += iVal;
  1190   1194         pLvl->iOff = iOff;
  1191   1195       }else{
  1192   1196         pLvl->bEof = 1;
................................................................................
  1421   1425   /*
  1422   1426   ** Load the next leaf page into the segment iterator.
  1423   1427   */
  1424   1428   static void fts5SegIterNextPage(
  1425   1429     Fts5Index *p,                   /* FTS5 backend object */
  1426   1430     Fts5SegIter *pIter              /* Iterator to advance to next page */
  1427   1431   ){
         1432  +  Fts5Data *pLeaf;
  1428   1433     Fts5StructureSegment *pSeg = pIter->pSeg;
  1429   1434     fts5DataRelease(pIter->pLeaf);
  1430   1435     pIter->iLeafPgno++;
  1431   1436     if( pIter->pNextLeaf ){
  1432   1437       assert( pIter->iLeafPgno<=pSeg->pgnoLast );
  1433   1438       pIter->pLeaf = pIter->pNextLeaf;
  1434   1439       pIter->pNextLeaf = 0;
  1435   1440     }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
  1436   1441       pIter->pLeaf = fts5DataRead(p, 
  1437         -        FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
         1442  +        FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
  1438   1443       );
  1439   1444     }else{
  1440   1445       pIter->pLeaf = 0;
         1446  +  }
         1447  +  pLeaf = pIter->pLeaf;
         1448  +
         1449  +  if( pLeaf ){
         1450  +    pIter->iPgidxOff = pLeaf->szLeaf;
         1451  +    if( fts5LeafIsTermless(pLeaf) ){
         1452  +      pIter->iEndofDoclist = pLeaf->nn+1;
         1453  +    }else{
         1454  +      pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
         1455  +          pIter->iEndofDoclist
         1456  +      );
         1457  +    }
  1441   1458     }
  1442   1459   }
  1443   1460   
  1444   1461   /*
  1445   1462   ** Argument p points to a buffer containing a varint to be interpreted as a
  1446   1463   ** position list size field. Read the varint and return the number of bytes
  1447   1464   ** read. Before returning, set *pnSz to the number of bytes in the position
................................................................................
  1466   1483   **
  1467   1484   ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
  1468   1485   ** position list content (if any).
  1469   1486   */
  1470   1487   static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  1471   1488     if( p->rc==SQLITE_OK ){
  1472   1489       int iOff = pIter->iLeafOffset;  /* Offset to read at */
  1473         -    if( iOff>=pIter->pLeaf->n ){
         1490  +    ASSERT_SZLEAF_OK(pIter->pLeaf);
         1491  +    if( iOff>=pIter->pLeaf->szLeaf ){
  1474   1492         p->rc = FTS5_CORRUPT;
  1475   1493       }else{
  1476   1494         const u8 *a = &pIter->pLeaf->p[iOff];
  1477   1495         pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
  1478   1496       }
  1479   1497     }
  1480   1498   }
  1481   1499   
  1482   1500   static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  1483   1501     u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  1484   1502     int iOff = pIter->iLeafOffset;
  1485   1503   
  1486         -  if( iOff>=pIter->pLeaf->n ){
         1504  +  ASSERT_SZLEAF_OK(pIter->pLeaf);
         1505  +  if( iOff>=pIter->pLeaf->szLeaf ){
  1487   1506       fts5SegIterNextPage(p, pIter);
  1488   1507       if( pIter->pLeaf==0 ){
  1489   1508         if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
  1490   1509         return;
  1491   1510       }
  1492   1511       iOff = 4;
  1493   1512       a = pIter->pLeaf->p;
................................................................................
  1519   1538     iOff += fts5GetVarint32(&a[iOff], nNew);
  1520   1539     pIter->term.n = nKeep;
  1521   1540     fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  1522   1541     iOff += nNew;
  1523   1542     pIter->iTermLeafOffset = iOff;
  1524   1543     pIter->iTermLeafPgno = pIter->iLeafPgno;
  1525   1544     pIter->iLeafOffset = iOff;
         1545  +
         1546  +  if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
         1547  +    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
         1548  +  }else{
         1549  +    int nExtra;
         1550  +    pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
         1551  +    pIter->iEndofDoclist += nExtra;
         1552  +  }
  1526   1553   
  1527   1554     fts5SegIterLoadRowid(p, pIter);
  1528   1555   }
  1529   1556   
  1530   1557   /*
  1531   1558   ** Initialize the iterator object pIter to iterate through the entries in
  1532   1559   ** segment pSeg. The iterator is left pointing to the first entry when 
................................................................................
  1554   1581       memset(pIter, 0, sizeof(*pIter));
  1555   1582       pIter->pSeg = pSeg;
  1556   1583       pIter->iLeafPgno = pSeg->pgnoFirst-1;
  1557   1584       fts5SegIterNextPage(p, pIter);
  1558   1585     }
  1559   1586   
  1560   1587     if( p->rc==SQLITE_OK ){
  1561         -    u8 *a = pIter->pLeaf->p;
  1562         -    pIter->iLeafOffset = fts5GetU16(&a[2]);
         1588  +    pIter->iLeafOffset = 4;
         1589  +    assert_nc( pIter->pLeaf->nn>4 );
         1590  +    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
         1591  +    pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
  1563   1592       fts5SegIterLoadTerm(p, pIter, 0);
  1564   1593       fts5SegIterLoadNPos(p, pIter);
  1565   1594     }
  1566   1595   }
  1567   1596   
  1568   1597   /*
  1569   1598   ** This function is only ever called on iterators created by calls to
................................................................................
  1577   1606   ** This function advances the iterator so that it points to the last 
  1578   1607   ** relevant rowid on the page and, if necessary, initializes the 
  1579   1608   ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
  1580   1609   ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
  1581   1610   ** byte of the position list content associated with said rowid.
  1582   1611   */
  1583   1612   static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  1584         -  int n = pIter->pLeaf->n;
         1613  +  int n = pIter->pLeaf->szLeaf;
  1585   1614     int i = pIter->iLeafOffset;
  1586   1615     u8 *a = pIter->pLeaf->p;
  1587   1616     int iRowidOffset = 0;
  1588   1617   
         1618  +  if( n>pIter->iEndofDoclist ){
         1619  +    n = pIter->iEndofDoclist;
         1620  +  }
         1621  +
         1622  +  ASSERT_SZLEAF_OK(pIter->pLeaf);
  1589   1623     while( 1 ){
  1590   1624       i64 iDelta = 0;
  1591   1625       int nPos;
  1592   1626       int bDummy;
  1593   1627   
  1594   1628       i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
  1595   1629       i += nPos;
  1596   1630       if( i>=n ) break;
  1597   1631       i += fts5GetVarint(&a[i], (u64*)&iDelta);
  1598         -    if( iDelta==0 ) break;
  1599   1632       pIter->iRowid += iDelta;
  1600   1633   
  1601   1634       if( iRowidOffset>=pIter->nRowidOffset ){
  1602   1635         int nNew = pIter->nRowidOffset + 8;
  1603   1636         int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
  1604   1637         if( aNew==0 ){
  1605   1638           p->rc = SQLITE_NOMEM;
................................................................................
  1625   1658   
  1626   1659     fts5DataRelease(pIter->pLeaf);
  1627   1660     pIter->pLeaf = 0;
  1628   1661     while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
  1629   1662       Fts5Data *pNew;
  1630   1663       pIter->iLeafPgno--;
  1631   1664       pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
  1632         -          pIter->pSeg->iSegid, 0, pIter->iLeafPgno
         1665  +          pIter->pSeg->iSegid, pIter->iLeafPgno
  1633   1666       ));
  1634   1667       if( pNew ){
  1635   1668         if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
  1636         -        if( pIter->iTermLeafOffset<pNew->n ){
         1669  +        if( pIter->iTermLeafOffset<pNew->szLeaf ){
  1637   1670             pIter->pLeaf = pNew;
  1638   1671             pIter->iLeafOffset = pIter->iTermLeafOffset;
  1639   1672           }
  1640   1673         }else{
  1641         -        int iRowidOff, dummy;
  1642         -        fts5LeafHeader(pNew, &iRowidOff, &dummy);
         1674  +        int iRowidOff;
         1675  +        iRowidOff = fts5LeafFirstRowidOff(pNew);
  1643   1676           if( iRowidOff ){
  1644   1677             pIter->pLeaf = pNew;
  1645   1678             pIter->iLeafOffset = iRowidOff;
  1646   1679           }
  1647   1680         }
  1648   1681   
  1649   1682         if( pIter->pLeaf ){
................................................................................
  1653   1686         }else{
  1654   1687           fts5DataRelease(pNew);
  1655   1688         }
  1656   1689       }
  1657   1690     }
  1658   1691   
  1659   1692     if( pIter->pLeaf ){
         1693  +    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  1660   1694       fts5SegIterReverseInitPage(p, pIter);
  1661   1695     }
  1662   1696   }
  1663   1697   
  1664   1698   /*
  1665   1699   ** Return true if the iterator passed as the second argument currently
  1666   1700   ** points to a delete marker. A delete marker is an entry with a 0 byte
................................................................................
  1708   1742         Fts5Data *pLeaf = pIter->pLeaf;
  1709   1743         int iOff;
  1710   1744         int bNewTerm = 0;
  1711   1745         int nKeep = 0;
  1712   1746   
  1713   1747         /* Search for the end of the position list within the current page. */
  1714   1748         u8 *a = pLeaf->p;
  1715         -      int n = pLeaf->n;
         1749  +      int n = pLeaf->szLeaf;
  1716   1750   
         1751  +      ASSERT_SZLEAF_OK(pLeaf);
  1717   1752         iOff = pIter->iLeafOffset + pIter->nPos;
  1718   1753   
  1719   1754         if( iOff<n ){
  1720         -        /* The next entry is on the current page */
  1721         -        u64 iDelta;
  1722         -        iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
  1723         -        pIter->iLeafOffset = iOff;
  1724         -        if( iDelta==0 ){
         1755  +        /* The next entry is on the current page. */
         1756  +        assert_nc( iOff<=pIter->iEndofDoclist );
         1757  +        if( iOff>=pIter->iEndofDoclist ){
  1725   1758             bNewTerm = 1;
  1726         -          if( iOff>=n ){
  1727         -            fts5SegIterNextPage(p, pIter);
  1728         -            pIter->iLeafOffset = 4;
  1729         -          }else if( iOff!=fts5GetU16(&a[2]) ){
  1730         -            pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep);
         1759  +          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
         1760  +            iOff += fts5GetVarint32(&a[iOff], nKeep);
  1731   1761             }
  1732   1762           }else{
         1763  +          u64 iDelta;
         1764  +          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
  1733   1765             pIter->iRowid += iDelta;
         1766  +          assert_nc( iDelta>0 );
  1734   1767           }
         1768  +        pIter->iLeafOffset = iOff;
         1769  +
  1735   1770         }else if( pIter->pSeg==0 ){
  1736   1771           const u8 *pList = 0;
  1737   1772           const char *zTerm = 0;
  1738   1773           int nList = 0;
  1739   1774           if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
  1740   1775             sqlite3Fts5HashScanNext(p->pHash);
  1741   1776             sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
  1742   1777           }
  1743   1778           if( pList==0 ){
  1744   1779             fts5DataRelease(pIter->pLeaf);
  1745   1780             pIter->pLeaf = 0;
  1746   1781           }else{
  1747   1782             pIter->pLeaf->p = (u8*)pList;
  1748         -          pIter->pLeaf->n = nList;
         1783  +          pIter->pLeaf->nn = nList;
         1784  +          pIter->pLeaf->szLeaf = nList;
         1785  +          pIter->iEndofDoclist = nList+1;
  1749   1786             sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
  1750   1787             pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
  1751   1788           }
  1752   1789         }else{
  1753   1790           iOff = 0;
  1754   1791           /* Next entry is not on the current page */
  1755   1792           while( iOff==0 ){
  1756   1793             fts5SegIterNextPage(p, pIter);
  1757   1794             pLeaf = pIter->pLeaf;
  1758   1795             if( pLeaf==0 ) break;
  1759         -          if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){
         1796  +          ASSERT_SZLEAF_OK(pLeaf);
         1797  +          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
  1760   1798               iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
  1761   1799               pIter->iLeafOffset = iOff;
         1800  +
         1801  +            if( pLeaf->nn>pLeaf->szLeaf ){
         1802  +              pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
         1803  +                  &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
         1804  +              );
         1805  +            }
         1806  +
  1762   1807             }
  1763         -          else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){
         1808  +          else if( pLeaf->nn>pLeaf->szLeaf ){
         1809  +            pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
         1810  +                &pLeaf->p[pLeaf->szLeaf], iOff
         1811  +            );
  1764   1812               pIter->iLeafOffset = iOff;
         1813  +            pIter->iEndofDoclist = iOff;
  1765   1814               bNewTerm = 1;
  1766   1815             }
  1767         -          if( iOff>=pLeaf->n ){
         1816  +          if( iOff>=pLeaf->szLeaf ){
  1768   1817               p->rc = FTS5_CORRUPT;
  1769   1818               return;
  1770   1819             }
  1771   1820           }
  1772   1821         }
  1773   1822   
  1774   1823         /* Check if the iterator is now at EOF. If so, return early. */
  1775   1824         if( pIter->pLeaf ){
  1776   1825           if( bNewTerm ){
  1777   1826             if( pIter->flags & FTS5_SEGITER_ONETERM ){
  1778   1827               fts5DataRelease(pIter->pLeaf);
  1779   1828               pIter->pLeaf = 0;
  1780   1829             }else{
         1830  +            int nExtra;
  1781   1831               fts5SegIterLoadTerm(p, pIter, nKeep);
  1782   1832               fts5SegIterLoadNPos(p, pIter);
  1783   1833               if( pbNewTerm ) *pbNewTerm = 1;
  1784   1834             }
  1785   1835           }else{
  1786   1836             fts5SegIterLoadNPos(p, pIter);
  1787   1837           }
................................................................................
  1801   1851     Fts5DlidxIter *pDlidx = pIter->pDlidx;
  1802   1852     Fts5Data *pLast = 0;
  1803   1853     int pgnoLast = 0;
  1804   1854   
  1805   1855     if( pDlidx ){
  1806   1856       int iSegid = pIter->pSeg->iSegid;
  1807   1857       pgnoLast = fts5DlidxIterPgno(pDlidx);
  1808         -    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
         1858  +    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
  1809   1859     }else{
  1810   1860       int iOff;                               /* Byte offset within pLeaf */
  1811   1861       Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
  1812   1862   
  1813   1863       /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first 
  1814   1864       ** byte of position-list content for the current rowid. Back it up
  1815   1865       ** so that it points to the start of the position-list size field. */
  1816   1866       pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
  1817         -    iOff = pIter->iLeafOffset;
  1818         -    assert( iOff>=4 );
  1819         -
  1820         -    /* Search for a new term within the current leaf. If one can be found,
  1821         -    ** then this page contains the largest rowid for the current term. */
  1822         -    while( iOff<pLeaf->n ){
  1823         -      int nPos;
  1824         -      i64 iDelta;
  1825         -      int bDummy;
  1826         -
  1827         -      /* Read the position-list size field */
  1828         -      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
  1829         -      iOff += nPos;
  1830         -      if( iOff>=pLeaf->n ) break;
  1831         -
  1832         -      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
  1833         -      nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
  1834         -      if( iDelta==0 ) break;
  1835         -      iOff += nPos;
  1836         -    }
  1837   1867   
  1838   1868       /* If this condition is true then the largest rowid for the current
  1839   1869       ** term may not be stored on the current page. So search forward to
  1840   1870       ** see where said rowid really is.  */
  1841         -    if( iOff>=pLeaf->n ){
         1871  +    if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
  1842   1872         int pgno;
  1843   1873         Fts5StructureSegment *pSeg = pIter->pSeg;
  1844   1874   
  1845   1875         /* The last rowid in the doclist may not be on the current page. Search
  1846   1876         ** forward to find the page containing the last rowid.  */
  1847   1877         for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
  1848         -        i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno);
         1878  +        i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
  1849   1879           Fts5Data *pNew = fts5DataRead(p, iAbs);
  1850   1880           if( pNew ){
  1851         -          int iRowid, iTerm;
  1852         -          fts5LeafHeader(pNew, &iRowid, &iTerm);
         1881  +          int iRowid, bTermless;
         1882  +          iRowid = fts5LeafFirstRowidOff(pNew);
         1883  +          bTermless = fts5LeafIsTermless(pNew);
  1853   1884             if( iRowid ){
  1854   1885               SWAPVAL(Fts5Data*, pNew, pLast);
  1855   1886               pgnoLast = pgno;
  1856   1887             }
  1857   1888             fts5DataRelease(pNew);
  1858         -          if( iTerm ) break;
         1889  +          if( bTermless==0 ) break;
  1859   1890           }
  1860   1891         }
  1861   1892       }
  1862   1893     }
  1863   1894   
  1864   1895     /* If pLast is NULL at this point, then the last rowid for this doclist
  1865   1896     ** lies on the page currently indicated by the iterator. In this case 
................................................................................
  1867   1898     ** field associated with the first relevant rowid on the page.
  1868   1899     **
  1869   1900     ** Or, if pLast is non-NULL, then it is the page that contains the last
  1870   1901     ** rowid. In this case configure the iterator so that it points to the
  1871   1902     ** first rowid on this page.
  1872   1903     */
  1873   1904     if( pLast ){
  1874         -    int dummy;
  1875   1905       int iOff;
  1876   1906       fts5DataRelease(pIter->pLeaf);
  1877   1907       pIter->pLeaf = pLast;
  1878   1908       pIter->iLeafPgno = pgnoLast;
  1879         -    fts5LeafHeader(pLast, &iOff, &dummy);
         1909  +    iOff = fts5LeafFirstRowidOff(pLast);
  1880   1910       iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
  1881   1911       pIter->iLeafOffset = iOff;
         1912  +
         1913  +    if( fts5LeafIsTermless(pLast) ){
         1914  +      pIter->iEndofDoclist = pLast->nn+1;
         1915  +    }else{
         1916  +      pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
         1917  +    }
         1918  +
  1882   1919     }
  1883   1920   
  1884   1921     fts5SegIterReverseInitPage(p, pIter);
  1885   1922   }
  1886   1923   
  1887   1924   /*
  1888   1925   ** Iterator pIter currently points to the first rowid of a doclist.
................................................................................
  1897   1934   
  1898   1935     assert( pIter->flags & FTS5_SEGITER_ONETERM );
  1899   1936     assert( pIter->pDlidx==0 );
  1900   1937   
  1901   1938     /* Check if the current doclist ends on this page. If it does, return
  1902   1939     ** early without loading the doclist-index (as it belongs to a different
  1903   1940     ** term. */
  1904         -  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
  1905         -    int iOff = pIter->iLeafOffset + pIter->nPos;
  1906         -    while( iOff<pLeaf->n ){
  1907         -      int bDummy;
  1908         -      int nPos;
  1909         -      i64 iDelta;
  1910         -
  1911         -      /* iOff is currently the offset of the start of position list data */
  1912         -      iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
  1913         -      if( iDelta==0 ) return;
  1914         -      assert_nc( iOff<pLeaf->n );
  1915         -      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
  1916         -      iOff += nPos;
  1917         -    }
         1941  +  if( pIter->iTermLeafPgno==pIter->iLeafPgno 
         1942  +   && pIter->iEndofDoclist<pLeaf->szLeaf 
         1943  +  ){
         1944  +    return;
  1918   1945     }
  1919   1946   
  1920   1947     pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
  1921   1948   }
  1922   1949   
  1923   1950   #define fts5IndexGetVarint32(a, iOff, nVal) {     \
  1924         -  nVal = a[iOff++];                               \
         1951  +  nVal = (a)[iOff++];                             \
  1925   1952     if( nVal & 0x80 ){                              \
  1926   1953       iOff--;                                       \
  1927         -    iOff += fts5GetVarint32(&a[iOff], nVal);      \
         1954  +    iOff += fts5GetVarint32(&(a)[iOff], nVal);    \
  1928   1955     }                                               \
  1929   1956   }
  1930   1957   
  1931   1958   #define fts5IndexSkipVarint(a, iOff) {            \
  1932   1959     int iEnd = iOff+9;                              \
  1933   1960     while( (a[iOff++] & 0x80) && iOff<iEnd );       \
  1934   1961   }
................................................................................
  1951   1978     Fts5Index *p,                   /* Leave any error code here */
  1952   1979     int bGe,                        /* True for a >= search */
  1953   1980     Fts5SegIter *pIter,             /* Iterator to seek */
  1954   1981     const u8 *pTerm, int nTerm      /* Term to search for */
  1955   1982   ){
  1956   1983     int iOff;
  1957   1984     const u8 *a = pIter->pLeaf->p;
  1958         -  int n = pIter->pLeaf->n;
         1985  +  int szLeaf = pIter->pLeaf->szLeaf;
         1986  +  int n = pIter->pLeaf->nn;
  1959   1987   
  1960   1988     int nMatch = 0;
  1961   1989     int nKeep = 0;
  1962   1990     int nNew = 0;
         1991  +  int iTerm = 0;
         1992  +  int iTermOff;
         1993  +  int iPgidx;                     /* Current offset in pgidx */
         1994  +  int bEndOfPage = 0;
  1963   1995   
  1964   1996     assert( p->rc==SQLITE_OK );
  1965         -  assert( pIter->pLeaf );
  1966   1997   
  1967         -  iOff = fts5GetU16(&a[2]);
  1968         -  if( iOff<4 || iOff>=n ){
  1969         -    p->rc = FTS5_CORRUPT;
  1970         -    return;
  1971         -  }
         1998  +  iPgidx = szLeaf;
         1999  +  iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
         2000  +  iOff = iTermOff;
  1972   2001   
  1973   2002     while( 1 ){
  1974         -    int i;
  1975         -    int nCmp;
  1976   2003   
  1977   2004       /* Figure out how many new bytes are in this term */
  1978   2005       fts5IndexGetVarint32(a, iOff, nNew);
  1979         -
  1980   2006       if( nKeep<nMatch ){
  1981   2007         goto search_failed;
  1982   2008       }
  1983   2009   
  1984   2010       assert( nKeep>=nMatch );
  1985   2011       if( nKeep==nMatch ){
         2012  +      int nCmp;
         2013  +      int i;
  1986   2014         nCmp = MIN(nNew, nTerm-nMatch);
  1987   2015         for(i=0; i<nCmp; i++){
  1988   2016           if( a[iOff+i]!=pTerm[nMatch+i] ) break;
  1989   2017         }
  1990   2018         nMatch += i;
  1991   2019   
  1992   2020         if( nTerm==nMatch ){
................................................................................
  1995   2023           }else{
  1996   2024             goto search_failed;
  1997   2025           }
  1998   2026         }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
  1999   2027           goto search_failed;
  2000   2028         }
  2001   2029       }
  2002         -    iOff += nNew;
  2003   2030   
  2004         -    /* Skip past the doclist. If the end of the page is reached, bail out. */
  2005         -    while( 1 ){
  2006         -      int nPos;
         2031  +    if( iPgidx>=n ){
         2032  +      bEndOfPage = 1;
         2033  +      break;
         2034  +    }
  2007   2035   
  2008         -      /* Skip past rowid delta */
  2009         -      fts5IndexSkipVarint(a, iOff);
  2010         -
  2011         -      /* Skip past position list */
  2012         -      fts5IndexGetVarint32(a, iOff, nPos);
  2013         -      iOff += (nPos >> 1);
  2014         -      if( iOff>=(n-1) ){
  2015         -        iOff = n;
  2016         -        goto search_failed;
  2017         -      }
  2018         -
  2019         -      /* If this is the end of the doclist, break out of the loop */
  2020         -      if( a[iOff]==0x00 ){
  2021         -        iOff++;
  2022         -        break;
  2023         -      }
  2024         -    };
         2036  +    iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
         2037  +    iTermOff += nKeep;
         2038  +    iOff = iTermOff;
  2025   2039   
  2026   2040       /* Read the nKeep field of the next term. */
  2027   2041       fts5IndexGetVarint32(a, iOff, nKeep);
  2028   2042     }
  2029   2043   
  2030   2044    search_failed:
  2031   2045     if( bGe==0 ){
  2032   2046       fts5DataRelease(pIter->pLeaf);
  2033   2047       pIter->pLeaf = 0;
  2034   2048       return;
  2035         -  }else if( iOff>=n ){
         2049  +  }else if( bEndOfPage ){
  2036   2050       do {
         2051  +      iTerm = 0;
  2037   2052         fts5SegIterNextPage(p, pIter);
  2038   2053         if( pIter->pLeaf==0 ) return;
  2039   2054         a = pIter->pLeaf->p;
  2040         -      iOff = fts5GetU16(&a[2]);
  2041         -      if( iOff ){
  2042         -        if( iOff<4 || iOff>=n ){
         2055  +      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
         2056  +        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
         2057  +        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
  2043   2058             p->rc = FTS5_CORRUPT;
  2044   2059           }else{
  2045   2060             nKeep = 0;
  2046   2061             iOff += fts5GetVarint32(&a[iOff], nNew);
  2047   2062             break;
  2048   2063           }
  2049   2064         }
  2050   2065       }while( 1 );
  2051   2066     }
  2052   2067   
  2053   2068    search_success:
         2069  +
  2054   2070     pIter->iLeafOffset = iOff + nNew;
  2055   2071     pIter->iTermLeafOffset = pIter->iLeafOffset;
  2056   2072     pIter->iTermLeafPgno = pIter->iLeafPgno;
  2057   2073   
  2058   2074     fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  2059   2075     fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  2060   2076   
         2077  +  if( iPgidx>=n ){
         2078  +    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
         2079  +  }else{
         2080  +    int nExtra;
         2081  +    iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
         2082  +    pIter->iEndofDoclist = iTermOff + nExtra;
         2083  +  }
         2084  +  pIter->iPgidxOff = iPgidx;
         2085  +
  2061   2086     fts5SegIterLoadRowid(p, pIter);
  2062   2087     fts5SegIterLoadNPos(p, pIter);
  2063   2088   }
  2064   2089   
  2065   2090   /*
  2066   2091   ** Initialize the object pIter to point to term pTerm/nTerm within segment
  2067   2092   ** pSeg. If there is no such term in the index, the iterator is set to EOF.
................................................................................
  2186   2211   
  2187   2212     if( pList ){
  2188   2213       Fts5Data *pLeaf;
  2189   2214       sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
  2190   2215       pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
  2191   2216       if( pLeaf==0 ) return;
  2192   2217       pLeaf->p = (u8*)pList;
  2193         -    pLeaf->n = nList;
         2218  +    pLeaf->nn = pLeaf->szLeaf = nList;
  2194   2219       pIter->pLeaf = pLeaf;
  2195   2220       pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
         2221  +    pIter->iEndofDoclist = pLeaf->nn+1;
  2196   2222   
  2197   2223       if( flags & FTS5INDEX_QUERY_DESC ){
  2198   2224         pIter->flags |= FTS5_SEGITER_REVERSE;
  2199   2225         fts5SegIterReverseInitPage(p, pIter);
  2200   2226       }else{
  2201   2227         fts5SegIterLoadNPos(p, pIter);
  2202   2228       }
................................................................................
  2379   2405       pIter->iLeafPgno = iLeafPgno-1;
  2380   2406       fts5SegIterNextPage(p, pIter);
  2381   2407       assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
  2382   2408   
  2383   2409       if( p->rc==SQLITE_OK ){
  2384   2410         int iOff;
  2385   2411         u8 *a = pIter->pLeaf->p;
  2386         -      int n = pIter->pLeaf->n;
         2412  +      int n = pIter->pLeaf->szLeaf;
  2387   2413   
  2388         -      iOff = fts5GetU16(&a[0]);
         2414  +      iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
  2389   2415         if( iOff<4 || iOff>=n ){
  2390   2416           p->rc = FTS5_CORRUPT;
  2391   2417         }else{
  2392   2418           iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  2393   2419           pIter->iLeafOffset = iOff;
  2394   2420           fts5SegIterLoadNPos(p, pIter);
  2395   2421         }
................................................................................
  2713   2739   ){
  2714   2740     Fts5IndexIter *pNew;
  2715   2741     pNew = fts5MultiIterAlloc(p, 2);
  2716   2742     if( pNew ){
  2717   2743       Fts5SegIter *pIter = &pNew->aSeg[1];
  2718   2744   
  2719   2745       pIter->flags = FTS5_SEGITER_ONETERM;
  2720         -    if( pData->n>0 ){
         2746  +    if( pData->szLeaf>0 ){
  2721   2747         pIter->pLeaf = pData;
  2722   2748         pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
         2749  +      pIter->iEndofDoclist = pData->nn;
  2723   2750         pNew->aFirst[1].iFirst = 1;
  2724   2751         if( bDesc ){
  2725   2752           pNew->bRev = 1;
  2726   2753           pIter->flags |= FTS5_SEGITER_REVERSE;
  2727   2754           fts5SegIterReverseInitPage(p, pIter);
  2728   2755         }else{
  2729   2756           fts5SegIterLoadNPos(p, pIter);
................................................................................
  2793   2820     Fts5SegIter *pSeg,              /* Poslist of this iterator */
  2794   2821     void *pCtx,                     /* Context pointer for xChunk callback */
  2795   2822     void (*xChunk)(Fts5Index*, void*, const u8*, int)
  2796   2823   ){
  2797   2824     int nRem = pSeg->nPos;          /* Number of bytes still to come */
  2798   2825     Fts5Data *pData = 0;
  2799   2826     u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  2800         -  int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
         2827  +  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  2801   2828     int pgno = pSeg->iLeafPgno;
  2802   2829     int pgnoSave = 0;
  2803   2830   
  2804   2831     if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
  2805   2832       pgnoSave = pgno+1;
  2806   2833     }
  2807   2834   
................................................................................
  2809   2836       xChunk(p, pCtx, pChunk, nChunk);
  2810   2837       nRem -= nChunk;
  2811   2838       fts5DataRelease(pData);
  2812   2839       if( nRem<=0 ){
  2813   2840         break;
  2814   2841       }else{
  2815   2842         pgno++;
  2816         -      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
         2843  +      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
  2817   2844         if( pData==0 ) break;
  2818   2845         pChunk = &pData->p[4];
  2819         -      nChunk = MIN(nRem, pData->n - 4);
         2846  +      nChunk = MIN(nRem, pData->szLeaf - 4);
  2820   2847         if( pgno==pgnoSave ){
  2821   2848           assert( pSeg->pNextLeaf==0 );
  2822   2849           pSeg->pNextLeaf = pData;
  2823   2850           pData = 0;
  2824   2851         }
  2825   2852       }
  2826   2853     }
................................................................................
  3098   3125   }
  3099   3126   
  3100   3127   static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  3101   3128     static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  3102   3129     Fts5PageWriter *pPage = &pWriter->writer;
  3103   3130     i64 iRowid;
  3104   3131   
         3132  +  assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
         3133  +
         3134  +  /* Set the szLeaf header field. */
         3135  +  assert( 0==fts5GetU16(&pPage->buf.p[2]) );
         3136  +  fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
         3137  +
  3105   3138     if( pWriter->bFirstTermInPage ){
  3106   3139       /* No term was written to this page. */
  3107         -    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
         3140  +    assert( pPage->pgidx.n==0 );
  3108   3141       fts5WriteBtreeNoTerm(p, pWriter);
         3142  +  }else{
         3143  +    /* Append the pgidx to the page buffer. Set the szLeaf header field. */
         3144  +    fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
  3109   3145     }
  3110   3146   
  3111         -  /* Write the current page to the db. */
  3112         -  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
         3147  +  /* Write the page out to disk */
         3148  +  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
  3113   3149     fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
  3114   3150   
  3115   3151     /* Initialize the next page. */
  3116   3152     fts5BufferZero(&pPage->buf);
         3153  +  fts5BufferZero(&pPage->pgidx);
  3117   3154     fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
         3155  +  pPage->iPrevPgidx = 0;
  3118   3156     pPage->pgno++;
  3119   3157   
  3120   3158     /* Increase the leaves written counter */
  3121   3159     pWriter->nLeafWritten++;
  3122   3160   
  3123   3161     /* The new leaf holds no terms or rowids */
  3124   3162     pWriter->bFirstTermInPage = 1;
................................................................................
  3135   3173   static void fts5WriteAppendTerm(
  3136   3174     Fts5Index *p, 
  3137   3175     Fts5SegWriter *pWriter,
  3138   3176     int nTerm, const u8 *pTerm 
  3139   3177   ){
  3140   3178     int nPrefix;                    /* Bytes of prefix compression for term */
  3141   3179     Fts5PageWriter *pPage = &pWriter->writer;
         3180  +  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
  3142   3181   
  3143         -  assert( pPage->buf.n==0 || pPage->buf.n>4 );
  3144         -  if( pPage->buf.n==0 ){
  3145         -    /* Zero the first term and first rowid fields */
  3146         -    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  3147         -    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  3148         -    assert( pWriter->bFirstTermInPage );
  3149         -  }
  3150   3182     if( p->rc ) return;
         3183  +  assert( pPage->buf.n>=4 );
         3184  +  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
         3185  +
         3186  +  /* If the current leaf page is full, flush it to disk. */
         3187  +  if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
         3188  +    if( pPage->buf.n>4 ){
         3189  +      fts5WriteFlushLeaf(p, pWriter);
         3190  +    }
         3191  +    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
         3192  +  }
  3151   3193     
         3194  +  /* TODO1: Updating pgidx here. */
         3195  +  pPgidx->n += sqlite3Fts5PutVarint(
         3196  +      &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
         3197  +  );
         3198  +  pPage->iPrevPgidx = pPage->buf.n;
         3199  +#if 0
         3200  +  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
         3201  +  pPgidx->n += 2;
         3202  +#endif
         3203  +
  3152   3204     if( pWriter->bFirstTermInPage ){
  3153         -    /* Update the "first term" field of the page header. */
  3154         -    assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
  3155         -    fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
  3156   3205       nPrefix = 0;
  3157   3206       if( pPage->pgno!=1 ){
  3158   3207         /* This is the first term on a leaf that is not the leftmost leaf in
  3159   3208         ** the segment b-tree. In this case it is necessary to add a term to
  3160   3209         ** the b-tree hierarchy that is (a) larger than the largest term 
  3161   3210         ** already written to the segment and (b) smaller than or equal to
  3162   3211         ** this term. In other words, a prefix of (pTerm/nTerm) that is one
................................................................................
  3190   3239     pWriter->bFirstTermInPage = 0;
  3191   3240   
  3192   3241     pWriter->bFirstRowidInPage = 0;
  3193   3242     pWriter->bFirstRowidInDoclist = 1;
  3194   3243   
  3195   3244     assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  3196   3245     pWriter->aDlidx[0].pgno = pPage->pgno;
  3197         -
  3198         -  /* If the current leaf page is full, flush it to disk. */
  3199         -  if( pPage->buf.n>=p->pConfig->pgsz ){
  3200         -    fts5WriteFlushLeaf(p, pWriter);
  3201         -  }
  3202   3246   }
  3203   3247   
  3204   3248   /*
  3205   3249   ** Append a rowid and position-list size field to the writers output. 
  3206   3250   */
  3207   3251   static void fts5WriteAppendRowid(
  3208   3252     Fts5Index *p, 
  3209   3253     Fts5SegWriter *pWriter,
  3210   3254     i64 iRowid,
  3211   3255     int nPos
  3212   3256   ){
  3213   3257     if( p->rc==SQLITE_OK ){
  3214   3258       Fts5PageWriter *pPage = &pWriter->writer;
         3259  +
         3260  +    if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
         3261  +      fts5WriteFlushLeaf(p, pWriter);
         3262  +    }
  3215   3263   
  3216   3264       /* If this is to be the first rowid written to the page, set the 
  3217   3265       ** rowid-pointer in the page-header. Also append a value to the dlidx
  3218   3266       ** buffer, in case a doclist-index is required.  */
  3219   3267       if( pWriter->bFirstRowidInPage ){
  3220   3268         fts5PutU16(pPage->buf.p, pPage->buf.n);
  3221   3269         fts5WriteDlidxAppend(p, pWriter, iRowid);
................................................................................
  3229   3277         fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
  3230   3278       }
  3231   3279       pWriter->iPrevRowid = iRowid;
  3232   3280       pWriter->bFirstRowidInDoclist = 0;
  3233   3281       pWriter->bFirstRowidInPage = 0;
  3234   3282   
  3235   3283       fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
  3236         -
  3237         -    if( pPage->buf.n>=p->pConfig->pgsz ){
  3238         -      fts5WriteFlushLeaf(p, pWriter);
  3239         -    }
  3240   3284     }
  3241   3285   }
  3242   3286   
  3243   3287   static void fts5WriteAppendPoslistData(
  3244   3288     Fts5Index *p, 
  3245   3289     Fts5SegWriter *pWriter, 
  3246   3290     const u8 *aData, 
................................................................................
  3247   3291     int nData
  3248   3292   ){
  3249   3293     Fts5PageWriter *pPage = &pWriter->writer;
  3250   3294     const u8 *a = aData;
  3251   3295     int n = nData;
  3252   3296     
  3253   3297     assert( p->pConfig->pgsz>0 );
  3254         -  while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){
  3255         -    int nReq = p->pConfig->pgsz - pPage->buf.n;
         3298  +  while( p->rc==SQLITE_OK 
         3299  +     && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz 
         3300  +  ){
         3301  +    int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
  3256   3302       int nCopy = 0;
  3257   3303       while( nCopy<nReq ){
  3258   3304         i64 dummy;
  3259   3305         nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
  3260   3306       }
  3261   3307       fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
  3262   3308       a += nCopy;
................................................................................
  3275   3321   /*
  3276   3322   ** Flush any data cached by the writer object to the database. Free any
  3277   3323   ** allocations associated with the writer.
  3278   3324   */
  3279   3325   static void fts5WriteFinish(
  3280   3326     Fts5Index *p, 
  3281   3327     Fts5SegWriter *pWriter,         /* Writer object */
  3282         -  int *pnHeight,                  /* OUT: Height of the b-tree */
  3283   3328     int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
  3284   3329   ){
  3285   3330     int i;
  3286   3331     Fts5PageWriter *pLeaf = &pWriter->writer;
  3287   3332     if( p->rc==SQLITE_OK ){
  3288   3333       if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
  3289   3334         *pnLeaf = 0;
  3290         -      *pnHeight = 0;
  3291   3335       }else{
  3292   3336         if( pLeaf->buf.n>4 ){
  3293   3337           fts5WriteFlushLeaf(p, pWriter);
  3294   3338         }
  3295   3339         *pnLeaf = pLeaf->pgno-1;
  3296   3340   
  3297   3341         fts5WriteFlushBtree(p, pWriter);
  3298         -      *pnHeight = 0;
  3299   3342       }
  3300   3343     }
  3301   3344     fts5BufferFree(&pLeaf->term);
  3302   3345     fts5BufferFree(&pLeaf->buf);
         3346  +  fts5BufferFree(&pLeaf->pgidx);
  3303   3347     fts5BufferFree(&pWriter->btterm);
  3304   3348   
  3305   3349     for(i=0; i<pWriter->nDlidx; i++){
  3306   3350       sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  3307   3351     }
  3308   3352     sqlite3_free(pWriter->aDlidx);
  3309   3353   }
  3310   3354   
  3311   3355   static void fts5WriteInit(
  3312   3356     Fts5Index *p, 
  3313   3357     Fts5SegWriter *pWriter, 
  3314   3358     int iSegid
  3315   3359   ){
         3360  +  const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
         3361  +
  3316   3362     memset(pWriter, 0, sizeof(Fts5SegWriter));
  3317   3363     pWriter->iSegid = iSegid;
  3318   3364   
  3319   3365     fts5WriteDlidxGrow(p, pWriter, 1);
  3320   3366     pWriter->writer.pgno = 1;
  3321   3367     pWriter->bFirstTermInPage = 1;
  3322   3368     pWriter->iBtPage = 1;
         3369  +
         3370  +  /* Grow the two buffers to pgsz + padding bytes in size. */
         3371  +  fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer);
         3372  +  fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer);
  3323   3373   
  3324   3374     if( p->pIdxWriter==0 ){
  3325   3375       Fts5Config *pConfig = p->pConfig;
  3326   3376       fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
  3327   3377             "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 
  3328   3378             pConfig->zDb, pConfig->zName
  3329   3379       ));
  3330   3380     }
  3331   3381   
  3332   3382     if( p->rc==SQLITE_OK ){
         3383  +    /* Initialize the 4-byte leaf-page header to 0x00. */
         3384  +    memset(pWriter->writer.buf.p, 0, 4);
         3385  +    pWriter->writer.buf.n = 4;
         3386  +
         3387  +    /* Bind the current output segment id to the index-writer. This is an
         3388  +    ** optimization over binding the same value over and over as rows are
         3389  +    ** inserted into %_idx by the current writer.  */
  3333   3390       sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  3334   3391     }
  3335   3392   }
  3336   3393   
  3337   3394   /*
  3338   3395   ** Iterator pIter was used to iterate through the input segments of on an
  3339   3396   ** incremental merge operation. This function is called if the incremental
................................................................................
  3354   3411         pSeg->pSeg->pgnoLast = 0;
  3355   3412         pSeg->pSeg->pgnoFirst = 0;
  3356   3413       }else{
  3357   3414         int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
  3358   3415         i64 iLeafRowid;
  3359   3416         Fts5Data *pData;
  3360   3417         int iId = pSeg->pSeg->iSegid;
  3361         -      u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04};
         3418  +      u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
  3362   3419   
  3363         -      iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno);
         3420  +      iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
  3364   3421         pData = fts5DataRead(p, iLeafRowid);
  3365   3422         if( pData ){
  3366   3423           fts5BufferZero(&buf);
         3424  +        fts5BufferGrow(&p->rc, &buf, pData->nn);
  3367   3425           fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
  3368   3426           fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
  3369   3427           fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
  3370         -        fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);
         3428  +        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
         3429  +        if( p->rc==SQLITE_OK ){
         3430  +          /* Set the szLeaf field */
         3431  +          fts5PutU16(&buf.p[2], buf.n);
         3432  +        }
         3433  +
         3434  +        /* Set up the new page-index array */
         3435  +        fts5BufferAppendVarint(&p->rc, &buf, 4);
         3436  +        if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 
         3437  +         && pSeg->iEndofDoclist<pData->szLeaf 
         3438  +        ){
         3439  +          int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
         3440  +          fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
         3441  +          fts5BufferAppendBlob(&p->rc, &buf, 
         3442  +              pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
         3443  +          );
         3444  +        }
         3445  +
  3371   3446           fts5DataRelease(pData);
  3372   3447           pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
  3373         -        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
         3448  +        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
  3374   3449           fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
  3375   3450         }
  3376   3451       }
  3377   3452     }
  3378   3453     fts5BufferFree(&buf);
  3379   3454   }
  3380   3455   
................................................................................
  3466   3541       pTerm = fts5MultiIterTerm(pIter, &nTerm);
  3467   3542       if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
  3468   3543         if( pnRem && writer.nLeafWritten>nRem ){
  3469   3544           break;
  3470   3545         }
  3471   3546   
  3472   3547         /* This is a new term. Append a term to the output segment. */
         3548  +      /* TODO2: Doclist 0x00 term */
  3473   3549         if( bRequireDoclistTerm ){
  3474         -        fts5WriteAppendZerobyte(p, &writer);
         3550  +        /* fts5WriteAppendZerobyte(p, &writer); */
  3475   3551         }
  3476   3552         fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
  3477   3553         fts5BufferSet(&p->rc, &term, nTerm, pTerm);
  3478   3554         bRequireDoclistTerm = 1;
  3479   3555       }
  3480   3556   
  3481   3557       /* Append the rowid to the output */
................................................................................
  3485   3561   
  3486   3562       /* Append the position-list data to the output */
  3487   3563       fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
  3488   3564     }
  3489   3565   
  3490   3566     /* Flush the last leaf page to disk. Set the output segment b-tree height
  3491   3567     ** and last leaf page number at the same time.  */
  3492         -  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);
         3568  +  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
  3493   3569   
  3494   3570     if( fts5MultiIterEof(p, pIter) ){
  3495   3571       int i;
  3496   3572   
  3497   3573       /* Remove the redundant segments from the %_data table */
  3498   3574       for(i=0; i<nInput; i++){
  3499   3575         fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
................................................................................
  3610   3686     const int nCrisis = p->pConfig->nCrisisMerge;
  3611   3687     Fts5Structure *pStruct = *ppStruct;
  3612   3688     int iLvl = 0;
  3613   3689   
  3614   3690     assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
  3615   3691     while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
  3616   3692       fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
         3693  +    assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
  3617   3694       fts5StructurePromote(p, iLvl+1, pStruct);
  3618   3695       iLvl++;
  3619   3696     }
  3620   3697     *ppStruct = pStruct;
  3621   3698   }
  3622   3699   
  3623   3700   static int fts5IndexReturn(Fts5Index *p){
................................................................................
  3637   3714   ** in a 32-bit integer. Return the size of the largest prefix of this 
  3638   3715   ** list nMax bytes or less in size.
  3639   3716   */
  3640   3717   static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
  3641   3718     int ret;
  3642   3719     u32 dummy;
  3643   3720     ret = fts5GetVarint32(aBuf, dummy);
  3644         -  while( 1 ){
  3645         -    int i = fts5GetVarint32(&aBuf[ret], dummy);
  3646         -    if( (ret + i) > nMax ) break;
  3647         -    ret += i;
         3721  +  if( ret<nMax ){
         3722  +    while( 1 ){
         3723  +      int i = fts5GetVarint32(&aBuf[ret], dummy);
         3724  +      if( (ret + i) > nMax ) break;
         3725  +      ret += i;
         3726  +    }
  3648   3727     }
  3649   3728     return ret;
  3650   3729   }
  3651   3730   
  3652   3731   #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  3653   3732     assert( pBuf->nSpace>=(pBuf->n+nBlob) );             \
  3654   3733     memcpy(&pBuf->p[pBuf->n], pBlob, nBlob);             \
................................................................................
  3673   3752     pStruct = fts5StructureRead(p);
  3674   3753     iSegid = fts5AllocateSegid(p, pStruct);
  3675   3754   
  3676   3755     if( iSegid ){
  3677   3756       const int pgsz = p->pConfig->pgsz;
  3678   3757   
  3679   3758       Fts5StructureSegment *pSeg;   /* New segment within pStruct */
  3680         -    int nHeight;                  /* Height of new segment b-tree */
  3681   3759       Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
         3760  +    Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
  3682   3761       const u8 *zPrev = 0;
  3683   3762   
  3684   3763       Fts5SegWriter writer;
  3685   3764       fts5WriteInit(p, &writer, iSegid);
  3686   3765   
  3687         -    /* Pre-allocate the buffer used to assemble leaf pages to the target
  3688         -    ** page size.  */
  3689         -    assert( pgsz>0 );
  3690   3766       pBuf = &writer.writer.buf;
  3691         -    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
         3767  +    pPgidx = &writer.writer.pgidx;
         3768  +
         3769  +    /* fts5WriteInit() should have initialized the buffers to (most likely)
         3770  +    ** the maximum space required. */
         3771  +    assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
         3772  +    assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
  3692   3773   
  3693   3774       /* Begin scanning through hash table entries. This loop runs once for each
  3694   3775       ** term/doclist currently stored within the hash table. */
  3695   3776       if( p->rc==SQLITE_OK ){
  3696         -      memset(pBuf->p, 0, 4);
  3697         -      pBuf->n = 4;
  3698   3777         p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
  3699   3778       }
  3700   3779       while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
  3701   3780         const char *zTerm;          /* Buffer containing term */
  3702         -      int nTerm;                  /* Size of zTerm in bytes */
  3703   3781         const u8 *pDoclist;         /* Pointer to doclist for this term */
  3704   3782         int nDoclist;               /* Size of doclist in bytes */
  3705   3783         int nSuffix;                /* Size of term suffix */
  3706   3784   
         3785  +      /* Write the term for this entry to disk. */
  3707   3786         sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
  3708         -      nTerm = strlen(zTerm);
         3787  +      fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm);
  3709   3788   
  3710         -      /* Decide if the term will fit on the current leaf. If it will not, 
  3711         -      ** flush the leaf to disk here.  */
  3712         -      if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){
  3713         -        fts5WriteFlushLeaf(p, &writer);
  3714         -        pBuf = &writer.writer.buf;
  3715         -        if( (nTerm + 32) > pBuf->nSpace ){
  3716         -          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
  3717         -          if( p->rc ) break;
  3718         -        }
  3719         -      }
  3720         -
  3721         -      /* Write the term to the leaf. And if it is the first on the leaf, and
  3722         -      ** the leaf is not page number 1, push it up into the b-tree hierarchy 
  3723         -      ** as well.  */
  3724         -      if( writer.bFirstTermInPage==0 ){
  3725         -        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
  3726         -        pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre);
  3727         -        nSuffix = nTerm - nPre;
  3728         -      }else{
  3729         -        fts5PutU16(&pBuf->p[2], pBuf->n);
  3730         -        writer.bFirstTermInPage = 0;
  3731         -        if( writer.writer.pgno!=1 ){
  3732         -          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
  3733         -          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
  3734         -          pBuf = &writer.writer.buf;
  3735         -          assert( nPre<nTerm );
  3736         -        }
  3737         -        nSuffix = nTerm;
  3738         -      }
  3739         -      pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix);
  3740         -      fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);
  3741         -
  3742         -      /* We just wrote a term into page writer.aWriter[0].pgno. If a 
  3743         -      ** doclist-index is to be generated for this doclist, it will be
  3744         -      ** associated with this page. */
  3745         -      assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
  3746         -      writer.aDlidx[0].pgno = writer.writer.pgno;
  3747         -
  3748         -      if( pgsz>=(pBuf->n + nDoclist + 1) ){
         3789  +      if( writer.bFirstRowidInPage==0 
         3790  +       && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) 
         3791  +      ){
  3749   3792           /* The entire doclist will fit on the current leaf. */
  3750   3793           fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
  3751   3794         }else{
  3752   3795           i64 iRowid = 0;
  3753   3796           i64 iDelta = 0;
  3754   3797           int iOff = 0;
  3755   3798   
  3756         -        writer.bFirstRowidInPage = 0;
         3799  +        /*  writer.bFirstRowidInPage = 0; */
  3757   3800   
  3758   3801           /* The entire doclist will not fit on this leaf. The following 
  3759   3802           ** loop iterates through the poslists that make up the current 
  3760   3803           ** doclist.  */
  3761   3804           while( p->rc==SQLITE_OK && iOff<nDoclist ){
  3762   3805             int nPos;
  3763   3806             int nCopy;
................................................................................
  3773   3816               writer.bFirstRowidInPage = 0;
  3774   3817               fts5WriteDlidxAppend(p, &writer, iRowid);
  3775   3818             }else{
  3776   3819               pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
  3777   3820             }
  3778   3821             assert( pBuf->n<=pBuf->nSpace );
  3779   3822   
  3780         -          if( (pBuf->n + nCopy) <= pgsz ){
         3823  +          if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
  3781   3824               /* The entire poslist will fit on the current leaf. So copy
  3782   3825               ** it in one go. */
  3783   3826               fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
  3784   3827             }else{
  3785   3828               /* The entire poslist will not fit on this leaf. So it needs
  3786   3829               ** to be broken into sections. The only qualification being
  3787   3830               ** that each varint must be stored contiguously.  */
  3788   3831               const u8 *pPoslist = &pDoclist[iOff];
  3789   3832               int iPos = 0;
  3790   3833               while( p->rc==SQLITE_OK ){
  3791         -              int nSpace = pgsz - pBuf->n;
         3834  +              int nSpace = pgsz - pBuf->n - pPgidx->n;
  3792   3835                 int n = 0;
  3793   3836                 if( (nCopy - iPos)<=nSpace ){
  3794   3837                   n = nCopy - iPos;
  3795   3838                 }else{
  3796   3839                   n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
  3797   3840                 }
  3798   3841                 assert( n>0 );
  3799   3842                 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
  3800   3843                 iPos += n;
  3801         -              if( pBuf->n>=pgsz ){
         3844  +              if( (pBuf->n + pPgidx->n)>=pgsz ){
  3802   3845                   fts5WriteFlushLeaf(p, &writer);
  3803         -                pBuf = &writer.writer.buf;
  3804   3846                 }
  3805   3847                 if( iPos>=nCopy ) break;
  3806   3848               }
  3807   3849             }
  3808   3850             iOff += nCopy;
  3809   3851           }
  3810   3852         }
  3811   3853   
  3812         -      pBuf->p[pBuf->n++] = '\0';
         3854  +      /* TODO2: Doclist terminator written here. */
         3855  +      /* pBuf->p[pBuf->n++] = '\0'; */
  3813   3856         assert( pBuf->n<=pBuf->nSpace );
  3814   3857         zPrev = (const u8*)zTerm;
  3815   3858         sqlite3Fts5HashScanNext(pHash);
  3816   3859       }
  3817   3860       sqlite3Fts5HashClear(pHash);
  3818         -    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
         3861  +    fts5WriteFinish(p, &writer, &pgnoLast);
  3819   3862   
  3820   3863       /* Update the Fts5Structure. It is written back to the database by the
  3821   3864       ** fts5StructureRelease() call below.  */
  3822   3865       if( pStruct->nLevel==0 ){
  3823   3866         fts5StructureAddLevel(&p->rc, &pStruct);
  3824   3867       }
  3825   3868       fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
  3826   3869       if( p->rc==SQLITE_OK ){
  3827   3870         pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
  3828   3871         pSeg->iSegid = iSegid;
  3829         -      pSeg->nHeight = nHeight;
  3830   3872         pSeg->pgnoFirst = 1;
  3831   3873         pSeg->pgnoLast = pgnoLast;
  3832   3874         pStruct->nSegment++;
  3833   3875       }
  3834   3876       fts5StructurePromote(p, 0, pStruct);
  3835   3877     }
  3836   3878   
................................................................................
  3924   3966   }
  3925   3967   
  3926   3968   static void fts5PoslistCallback(
  3927   3969     Fts5Index *p, 
  3928   3970     void *pCtx, 
  3929   3971     const u8 *pChunk, int nChunk
  3930   3972   ){
  3931         -  fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
         3973  +  assert_nc( nChunk>=0 );
         3974  +  if( nChunk>0 ){
         3975  +    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
         3976  +  }
  3932   3977   }
  3933   3978   
  3934   3979   /*
  3935   3980   ** Iterator pIter currently points to a valid entry (not EOF). This
  3936   3981   ** function appends the position list data for the current entry to
  3937   3982   ** buffer pBuf. It does not make a copy of the position-list size
  3938   3983   ** field.
................................................................................
  4159   4204         fts5BufferFree(&aBuf[i]);
  4160   4205       }
  4161   4206       fts5MultiIterFree(p, p1);
  4162   4207   
  4163   4208       pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
  4164   4209       if( pData ){
  4165   4210         pData->p = (u8*)&pData[1];
  4166         -      pData->n = doclist.n;
         4211  +      pData->nn = pData->szLeaf = doclist.n;
  4167   4212         memcpy(pData->p, doclist.p, doclist.n);
  4168   4213         fts5MultiIterNew2(p, pData, bDesc, ppIter);
  4169   4214       }
  4170   4215       fts5BufferFree(&doclist);
  4171   4216     }
  4172   4217   
  4173   4218     fts5StructureRelease(pStruct);
................................................................................
  4389   4434          || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN
  4390   4435     );
  4391   4436   
  4392   4437     if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){
  4393   4438       memcpy(&buf.p[1], pToken, nToken);
  4394   4439   
  4395   4440   #ifdef SQLITE_DEBUG
  4396         -    if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){
         4441  +    /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
         4442  +    ** prefix-query. Instead of using a prefix-index (if one exists), 
         4443  +    ** evaluate the prefix query using the main FTS index. This is used
         4444  +    ** for internal sanity checking by the integrity-check in debug 
         4445  +    ** mode only.  */
         4446  +    if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
  4397   4447         assert( flags & FTS5INDEX_QUERY_PREFIX );
  4398   4448         iIdx = 1+pConfig->nPrefix;
  4399   4449       }else
  4400   4450   #endif
  4401   4451       if( flags & FTS5INDEX_QUERY_PREFIX ){
  4402   4452         int nChar = fts5IndexCharlen(pToken, nToken);
  4403   4453         for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
................................................................................
  4509   4559     int *pn,                        /* OUT: Size of position-list in bytes */
  4510   4560     i64 *piRowid                    /* OUT: Current rowid */
  4511   4561   ){
  4512   4562     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  4513   4563     assert( pIter->pIndex->rc==SQLITE_OK );
  4514   4564     *piRowid = pSeg->iRowid;
  4515   4565     *pn = pSeg->nPos;
  4516         -  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
         4566  +  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
  4517   4567       *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  4518   4568     }else{
  4519   4569       fts5BufferZero(&pIter->poslist);
  4520   4570       fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
  4521   4571       *pp = pIter->poslist.p;
  4522   4572     }
  4523   4573     return fts5IndexReturn(pIter->pIndex);
................................................................................
  4557   4607   int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
  4558   4608     int nCol = p->pConfig->nCol;
  4559   4609     Fts5Data *pData;
  4560   4610   
  4561   4611     *pnRow = 0;
  4562   4612     memset(anSize, 0, sizeof(i64) * nCol);
  4563   4613     pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
  4564         -  if( p->rc==SQLITE_OK && pData->n ){
         4614  +  if( p->rc==SQLITE_OK && pData->nn ){
  4565   4615       int i = 0;
  4566   4616       int iCol;
  4567   4617       i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
  4568         -    for(iCol=0; i<pData->n && iCol<nCol; iCol++){
         4618  +    for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
  4569   4619         i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
  4570   4620       }
  4571   4621     }
  4572   4622   
  4573   4623     fts5DataRelease(pData);
  4574   4624     return fts5IndexReturn(p);
  4575   4625   }
................................................................................
  4766   4816       if( rc==SQLITE_OK ){
  4767   4817         int f = flags|FTS5INDEX_QUERY_DESC;
  4768   4818         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  4769   4819       }
  4770   4820       if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
  4771   4821   
  4772   4822       /* If this is a prefix query, check that the results returned if the
  4773         -    ** the index is disabled are the same. In both ASC and DESC order. */
  4774         -    if( iIdx>0 && rc==SQLITE_OK ){
  4775         -      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
  4776         -      ck2 = 0;
  4777         -      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  4778         -      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
  4779         -    }
  4780         -    if( iIdx>0 && rc==SQLITE_OK ){
  4781         -      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
  4782         -      ck2 = 0;
  4783         -      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  4784         -      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
         4823  +    ** the index is disabled are the same. In both ASC and DESC order. 
         4824  +    **
         4825  +    ** This check may only be performed if the hash table is empty. This
         4826  +    ** is because the hash table only supports a single scan query at
         4827  +    ** a time, and the multi-iter loop from which this function is called
         4828  +    ** is already performing such a scan. */
         4829  +    if( p->nPendingData==0 ){
         4830  +      if( iIdx>0 && rc==SQLITE_OK ){
         4831  +        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
         4832  +        ck2 = 0;
         4833  +        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
         4834  +        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
         4835  +      }
         4836  +      if( iIdx>0 && rc==SQLITE_OK ){
         4837  +        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
         4838  +        ck2 = 0;
         4839  +        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
         4840  +        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
         4841  +      }
  4785   4842       }
  4786   4843   
  4787   4844       cksum3 ^= ck1;
  4788   4845       fts5BufferSet(&rc, pPrev, n, (const u8*)z);
  4789   4846   
  4790   4847       if( rc==SQLITE_OK && cksum3!=expected ){
  4791   4848         rc = FTS5_CORRUPT;
................................................................................
  4816   4873     int iLast
  4817   4874   ){
  4818   4875     int i;
  4819   4876   
  4820   4877     /* Now check that the iter.nEmpty leaves following the current leaf
  4821   4878     ** (a) exist and (b) contain no terms. */
  4822   4879     for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
  4823         -    Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i));
         4880  +    Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
  4824   4881       if( pLeaf ){
  4825         -      if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT;
  4826         -      if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT;
         4882  +      if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
         4883  +      if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
  4827   4884       }
  4828   4885       fts5DataRelease(pLeaf);
  4829   4886       if( p->rc ) break;
  4830   4887     }
  4831   4888   }
         4889  +
         4890  +static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
         4891  +  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
         4892  +  int iTermOff = 0;
         4893  +  int ii;
         4894  +
         4895  +  Fts5Buffer buf1 = {0,0,0};
         4896  +  Fts5Buffer buf2 = {0,0,0};
         4897  +
         4898  +  ii = pLeaf->szLeaf;
         4899  +  while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
         4900  +    int res;
         4901  +    int iOff;
         4902  +    int nIncr;
         4903  +
         4904  +    ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
         4905  +    iTermOff += nIncr;
         4906  +    iOff = iTermOff;
         4907  +
         4908  +    if( iOff>=pLeaf->szLeaf ){
         4909  +      p->rc = FTS5_CORRUPT;
         4910  +    }else if( iTermOff==nIncr ){
         4911  +      int nByte;
         4912  +      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
         4913  +      if( (iOff+nByte)>pLeaf->szLeaf ){
         4914  +        p->rc = FTS5_CORRUPT;
         4915  +      }else{
         4916  +        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
         4917  +      }
         4918  +    }else{
         4919  +      int nKeep, nByte;
         4920  +      iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
         4921  +      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
         4922  +      if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
         4923  +        p->rc = FTS5_CORRUPT;
         4924  +      }else{
         4925  +        buf1.n = nKeep;
         4926  +        fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
         4927  +      }
         4928  +
         4929  +      if( p->rc==SQLITE_OK ){
         4930  +        res = fts5BufferCompare(&buf1, &buf2);
         4931  +        if( res<=0 ) p->rc = FTS5_CORRUPT;
         4932  +      }
         4933  +    }
         4934  +    fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
         4935  +  }
         4936  +
         4937  +  fts5BufferFree(&buf1);
         4938  +  fts5BufferFree(&buf2);
         4939  +}
  4832   4940   
  4833   4941   static void fts5IndexIntegrityCheckSegment(
  4834   4942     Fts5Index *p,                   /* FTS5 backend object */
  4835   4943     Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
  4836   4944   ){
  4837   4945     Fts5Config *pConfig = p->pConfig;
  4838   4946     sqlite3_stmt *pStmt = 0;
................................................................................
  4847   4955         pConfig->zDb, pConfig->zName, pSeg->iSegid
  4848   4956     ));
  4849   4957   
  4850   4958     /* Iterate through the b-tree hierarchy.  */
  4851   4959     while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
  4852   4960       i64 iRow;                     /* Rowid for this leaf */
  4853   4961       Fts5Data *pLeaf;              /* Data for this leaf */
  4854         -    int iOff;                     /* Offset of first term on leaf */
  4855   4962   
  4856   4963       int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
  4857   4964       const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
  4858   4965       int iIdxLeaf = sqlite3_column_int(pStmt, 2);
  4859   4966       int bIdxDlidx = sqlite3_column_int(pStmt, 3);
  4860   4967   
  4861   4968       /* If the leaf in question has already been trimmed from the segment, 
  4862   4969       ** ignore this b-tree entry. Otherwise, load it into memory. */
  4863   4970       if( iIdxLeaf<pSeg->pgnoFirst ) continue;
  4864         -    iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf);
         4971  +    iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
  4865   4972       pLeaf = fts5DataRead(p, iRow);
  4866   4973       if( pLeaf==0 ) break;
  4867   4974   
  4868   4975       /* Check that the leaf contains at least one term, and that it is equal
  4869   4976       ** to or larger than the split-key in zIdxTerm.  Also check that if there
  4870   4977       ** is also a rowid pointer within the leaf page header, it points to a
  4871   4978       ** location before the term.  */
  4872         -    iOff = fts5GetU16(&pLeaf->p[2]);
  4873         -    if( iOff==0 ){
         4979  +    if( pLeaf->nn<=pLeaf->szLeaf ){
  4874   4980         p->rc = FTS5_CORRUPT;
  4875   4981       }else{
  4876         -      int iRowidOff;
         4982  +      int iOff;                   /* Offset of first term on leaf */
         4983  +      int iRowidOff;              /* Offset of first rowid on leaf */
  4877   4984         int nTerm;                  /* Size of term on leaf in bytes */
  4878   4985         int res;                    /* Comparison of term and split-key */
  4879   4986   
  4880         -      iRowidOff = fts5GetU16(&pLeaf->p[0]);
         4987  +      iOff = fts5LeafFirstTermOff(pLeaf);
         4988  +      iRowidOff = fts5LeafFirstRowidOff(pLeaf);
  4881   4989         if( iRowidOff>=iOff ){
  4882   4990           p->rc = FTS5_CORRUPT;
  4883   4991         }else{
  4884   4992           iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
  4885   4993           res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
  4886   4994           if( res==0 ) res = nTerm - nIdxTerm;
  4887   4995           if( res<0 ) p->rc = FTS5_CORRUPT;
  4888   4996         }
         4997  +
         4998  +      fts5IntegrityCheckPgidx(p, pLeaf);
  4889   4999       }
  4890   5000       fts5DataRelease(pLeaf);
  4891   5001       if( p->rc ) break;
  4892   5002   
  4893   5003   
  4894   5004       /* Now check that the iter.nEmpty leaves following the current leaf
  4895   5005       ** (a) exist and (b) contain no terms. */
................................................................................
  4909   5019         for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
  4910   5020             fts5DlidxIterEof(p, pDlidx)==0;
  4911   5021             fts5DlidxIterNext(p, pDlidx)
  4912   5022         ){
  4913   5023   
  4914   5024           /* Check any rowid-less pages that occur before the current leaf. */
  4915   5025           for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
  4916         -          iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
         5026  +          iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
  4917   5027             pLeaf = fts5DataRead(p, iKey);
  4918   5028             if( pLeaf ){
  4919         -            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
         5029  +            if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
  4920   5030               fts5DataRelease(pLeaf);
  4921   5031             }
  4922   5032           }
  4923   5033           iPrevLeaf = fts5DlidxIterPgno(pDlidx);
  4924   5034   
  4925   5035           /* Check that the leaf page indicated by the iterator really does
  4926   5036           ** contain the rowid suggested by the same. */
  4927         -        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
         5037  +        iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
  4928   5038           pLeaf = fts5DataRead(p, iKey);
  4929   5039           if( pLeaf ){
  4930   5040             i64 iRowid;
  4931         -          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
  4932         -          if( iRowidOff>=pLeaf->n ){
         5041  +          int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
         5042  +          ASSERT_SZLEAF_OK(pLeaf);
         5043  +          if( iRowidOff>=pLeaf->szLeaf ){
  4933   5044               p->rc = FTS5_CORRUPT;
  4934   5045             }else{
  4935   5046               fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
  4936   5047               if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
  4937   5048             }
  4938   5049             fts5DataRelease(pLeaf);
  4939   5050           }
................................................................................
  5126   5237     for(iLvl=0; iLvl<p->nLevel; iLvl++){
  5127   5238       Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
  5128   5239       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
  5129   5240           " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
  5130   5241       );
  5131   5242       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
  5132   5243         Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
  5133         -      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
  5134         -          " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, 
  5135         -          pSeg->pgnoFirst, pSeg->pgnoLast
         5244  +      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", 
         5245  +          pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
  5136   5246         );
  5137   5247       }
  5138   5248       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  5139   5249     }
  5140   5250   }
  5141   5251   
  5142   5252   /*
................................................................................
  5189   5299   **
  5190   5300   ** The return value is the number of bytes read from the input buffer.
  5191   5301   */
  5192   5302   static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  5193   5303     i64 iDocid;
  5194   5304     int iOff = 0;
  5195   5305   
  5196         -  iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid);
  5197         -  sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
         5306  +  if( n>0 ){
         5307  +    iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
         5308  +    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
         5309  +  }
  5198   5310     while( iOff<n ){
  5199   5311       int nPos;
  5200   5312       int bDummy;
  5201   5313       iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
  5202   5314       iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
  5203   5315       if( iOff<n ){
  5204   5316         i64 iDelta;
  5205   5317         iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
  5206   5318         if( iDelta==0 ) return iOff;
  5207   5319         iDocid += iDelta;
  5208         -      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
         5320  +      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  5209   5321       }
  5210   5322     }
  5211   5323   
  5212   5324     return iOff;
  5213   5325   }
  5214   5326   
  5215   5327   /*
................................................................................
  5227   5339     Fts5Buffer s;                   /* Build up text to return here */
  5228   5340     int rc = SQLITE_OK;             /* Return code */
  5229   5341     int nSpace = 0;
  5230   5342   
  5231   5343     assert( nArg==2 );
  5232   5344     memset(&s, 0, sizeof(Fts5Buffer));
  5233   5345     iRowid = sqlite3_value_int64(apVal[0]);
         5346  +
         5347  +  /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
         5348  +  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
         5349  +  ** buffer overreads even if the record is corrupt.  */
  5234   5350     n = sqlite3_value_bytes(apVal[1]);
  5235   5351     aBlob = sqlite3_value_blob(apVal[1]);
  5236         -
  5237   5352     nSpace = n + FTS5_DATA_ZERO_PADDING;
  5238   5353     a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  5239   5354     if( a==0 ) goto decode_out;
  5240   5355     memcpy(a, aBlob, n);
         5356  +
         5357  +
  5241   5358     fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
  5242   5359   
  5243   5360     fts5DebugRowid(&rc, &s, iRowid);
  5244   5361     if( bDlidx ){
  5245   5362       Fts5Data dlidx;
  5246   5363       Fts5DlidxLvl lvl;
  5247   5364   
  5248   5365       dlidx.p = a;
  5249         -    dlidx.n = n;
         5366  +    dlidx.nn = n;
  5250   5367   
  5251   5368       memset(&lvl, 0, sizeof(Fts5DlidxLvl));
  5252   5369       lvl.pData = &dlidx;
  5253   5370       lvl.iLeafPgno = iPgno;
  5254   5371   
  5255   5372       for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
  5256   5373         sqlite3Fts5BufferAppendPrintf(&rc, &s, 
................................................................................
  5260   5377     }else if( iSegid==0 ){
  5261   5378       if( iRowid==FTS5_AVERAGES_ROWID ){
  5262   5379         /* todo */
  5263   5380       }else{
  5264   5381         fts5DecodeStructure(&rc, &s, a, n);
  5265   5382       }
  5266   5383     }else{
  5267         -    Fts5Buffer term;
         5384  +    Fts5Buffer term;              /* Current term read from page */
         5385  +    int szLeaf;                   /* Offset of pgidx in a[] */
         5386  +    int iPgidxOff;
         5387  +    int iPgidxPrev = 0;           /* Previous value read from pgidx */
  5268   5388       int iTermOff = 0;
  5269   5389       int iRowidOff = 0;
  5270   5390       int iOff;
  5271         -    int nKeep = 0;
         5391  +    int nDoclist;
  5272   5392   
  5273   5393       memset(&term, 0, sizeof(Fts5Buffer));
  5274   5394   
  5275         -    if( n>=4 ){
  5276         -      iRowidOff = fts5GetU16(&a[0]);
  5277         -      iTermOff = fts5GetU16(&a[2]);
  5278         -    }else{
         5395  +    if( n<4 ){
  5279   5396         sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
  5280   5397         goto decode_out;
         5398  +    }else{
         5399  +      iRowidOff = fts5GetU16(&a[0]);
         5400  +      iPgidxOff = szLeaf = fts5GetU16(&a[2]);
         5401  +      if( iPgidxOff<n ){
         5402  +        fts5GetVarint32(&a[iPgidxOff], iTermOff);
         5403  +      }
  5281   5404       }
  5282   5405   
  5283         -    if( iRowidOff ){
         5406  +    /* Decode the position list tail at the start of the page */
         5407  +    if( iRowidOff!=0 ){
  5284   5408         iOff = iRowidOff;
  5285         -    }else if( iTermOff ){
         5409  +    }else if( iTermOff!=0 ){
  5286   5410         iOff = iTermOff;
  5287   5411       }else{
  5288         -      iOff = n;
         5412  +      iOff = szLeaf;
  5289   5413       }
  5290   5414       fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
  5291   5415   
  5292         -    assert( iRowidOff==0 || iOff==iRowidOff );
  5293         -    if( iRowidOff ){
  5294         -      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
  5295         -    }
         5416  +    /* Decode any more doclist data that appears on the page before the
         5417  +    ** first term. */
         5418  +    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
         5419  +    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
         5420  +
         5421  +    while( iPgidxOff<n ){
         5422  +      int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
         5423  +      int nByte;                            /* Bytes of data */
         5424  +      int iEnd;
         5425  +      
         5426  +      iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
         5427  +      iPgidxPrev += nByte;
         5428  +      iOff = iPgidxPrev;
         5429  +
         5430  +      if( iPgidxOff<n ){
         5431  +        fts5GetVarint32(&a[iPgidxOff], nByte);
         5432  +        iEnd = iPgidxPrev + nByte;
         5433  +      }else{
         5434  +        iEnd = szLeaf;
         5435  +      }
  5296   5436   
  5297         -    assert( iTermOff==0 || iOff==iTermOff );
  5298         -    while( iOff<n ){
  5299         -      int nByte;
         5437  +      if( bFirst==0 ){
         5438  +        iOff += fts5GetVarint32(&a[iOff], nByte);
         5439  +        term.n = nByte;
         5440  +      }
  5300   5441         iOff += fts5GetVarint32(&a[iOff], nByte);
  5301         -      term.n= nKeep;
  5302   5442         fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
  5303   5443         iOff += nByte;
  5304   5444   
  5305   5445         sqlite3Fts5BufferAppendPrintf(
  5306   5446             &rc, &s, " term=%.*s", term.n, (const char*)term.p
  5307         -          );
  5308         -      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
  5309         -      if( iOff<n ){
  5310         -        iOff += fts5GetVarint32(&a[iOff], nKeep);
  5311         -      }
         5447  +      );
         5448  +      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
  5312   5449       }
         5450  +
  5313   5451       fts5BufferFree(&term);
  5314   5452     }
  5315   5453     
  5316   5454    decode_out:
  5317   5455     sqlite3_free(a);
  5318   5456     if( rc==SQLITE_OK ){
  5319   5457       sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
................................................................................
  5335   5473     if( nArg==0 ){
  5336   5474       sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  5337   5475     }else{
  5338   5476       zArg = (const char*)sqlite3_value_text(apVal[0]);
  5339   5477       if( 0==sqlite3_stricmp(zArg, "segment") ){
  5340   5478         i64 iRowid;
  5341   5479         int segid, height, pgno;
  5342         -      if( nArg!=4 ){
         5480  +      if( nArg!=3 ){
  5343   5481           sqlite3_result_error(pCtx, 
  5344         -            "should be: fts5_rowid('segment', segid, height, pgno))", -1
         5482  +            "should be: fts5_rowid('segment', segid, pgno))", -1
  5345   5483           );
  5346   5484         }else{
  5347   5485           segid = sqlite3_value_int(apVal[1]);
  5348         -        height = sqlite3_value_int(apVal[2]);
  5349         -        pgno = sqlite3_value_int(apVal[3]);
  5350         -        iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno);
         5486  +        pgno = sqlite3_value_int(apVal[2]);
         5487  +        iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
  5351   5488           sqlite3_result_int64(pCtx, iRowid);
  5352   5489         }
  5353         -    }else {
         5490  +    }else{
  5354   5491         sqlite3_result_error(pCtx, 
  5355         -        "first arg to fts5_rowid() must be 'segment' "
  5356         -        "or 'start-of-index'"
  5357         -        , -1
         5492  +        "first arg to fts5_rowid() must be 'segment'" , -1
  5358   5493         );
  5359   5494       }
  5360   5495     }
  5361   5496   }
  5362   5497   
  5363   5498   /*
  5364   5499   ** This is called as part of registering the FTS5 module with database

Changes to ext/fts5/fts5_main.c.

  1313   1313     }else if( 0==sqlite3_stricmp("optimize", z) ){
  1314   1314       rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
  1315   1315     }else if( 0==sqlite3_stricmp("merge", z) ){
  1316   1316       int nMerge = sqlite3_value_int(pVal);
  1317   1317       rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
  1318   1318     }else if( 0==sqlite3_stricmp("integrity-check", z) ){
  1319   1319       rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
         1320  +#ifdef SQLITE_DEBUG
         1321  +  }else if( 0==sqlite3_stricmp("prefix-index", z) ){
         1322  +    pConfig->bPrefixIndex = sqlite3_value_int(pVal);
         1323  +#endif
  1320   1324     }else{
  1321   1325       rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
  1322   1326       if( rc==SQLITE_OK ){
  1323   1327         rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError);
  1324   1328       }
  1325   1329       if( rc==SQLITE_OK ){
  1326   1330         if( bError ){

Changes to ext/fts5/test/fts5aa.test.

    47     47   }
    48     48   do_execsql_test 2.1 {
    49     49     INSERT INTO t1 VALUES('a b c', 'd e f');
    50     50   }
    51     51   
    52     52   do_test 2.2 {
    53     53     execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
    54         -} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
           54  +} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/}
    55     55   
    56     56   foreach w {a b c d e f} {
    57     57     do_execsql_test 2.3.$w.asc {
    58     58       SELECT rowid FROM t1 WHERE t1 MATCH $w;
    59     59     } {1}
    60     60     do_execsql_test 2.3.$w.desc {
    61     61       SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
................................................................................
   135    135     do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
   136    136     do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
   137    137     if {[set_test_counter errors]} break
   138    138   }
   139    139   
   140    140   #-------------------------------------------------------------------------
   141    141   #
   142         -breakpoint
   143    142   reset_db
   144    143   do_execsql_test 6.0 {
   145    144     CREATE VIRTUAL TABLE t1 USING fts5(x,y);
   146    145     INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
   147    146   }
   148    147   
   149    148   do_execsql_test 6.1 {
................................................................................
   197    196         set y [doc]
   198    197         set z [doc]
   199    198         set rowid [expr int(rand() * 100)]
   200    199         execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
   201    200       }
   202    201       execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
   203    202     } {}
          203  +  if {[set_test_counter errors]} break
   204    204   }
   205    205   
   206    206   #-------------------------------------------------------------------------
   207    207   #
   208    208   reset_db
   209    209   do_execsql_test 8.0 {
   210    210     CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");

Changes to ext/fts5/test/fts5ad.test.

   201    201           }
   202    202         }
   203    203         if {$bMatch} { lappend ret $rowid }
   204    204       }
   205    205       return $ret
   206    206     }
   207    207   
          208  +  do_execsql_test $T.integrity {
          209  +    INSERT INTO t1(t1) VALUES('integrity-check');
          210  +  }
   208    211     
   209    212     foreach {bAsc sql} {
   210    213       1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
   211    214       0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
   212    215     } {
   213    216       foreach {tn prefix} {
   214    217         1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 

Changes to ext/fts5/test/fts5al.test.

    22     22     finish_test
    23     23     return
    24     24   }
    25     25   
    26     26   do_execsql_test 1.1 {
    27     27     CREATE VIRTUAL TABLE ft1 USING fts5(x);
    28     28     SELECT * FROM ft1_config;
    29         -} {version 3}
           29  +} {version 4}
    30     30   
    31     31   do_execsql_test 1.2 {
    32     32     INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
    33     33     SELECT * FROM ft1_config;
    34         -} {pgsz 32 version 3}
           34  +} {pgsz 32 version 4}
    35     35   
    36     36   do_execsql_test 1.3 {
    37     37     INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
    38     38     SELECT * FROM ft1_config;
    39         -} {pgsz 64 version 3}
           39  +} {pgsz 64 version 4}
    40     40   
    41     41   #--------------------------------------------------------------------------
    42     42   # Test the logic for parsing the rank() function definition.
    43     43   #
    44     44   foreach {tn defn} {
    45     45     1 "fname()"
    46     46     2 "fname(1)"

Changes to ext/fts5/test/fts5corrupt.test.

    39     39   db_save
    40     40   
    41     41   do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
    42     42   set segid [lindex [fts5_level_segids t1] 0]
    43     43   
    44     44   do_test 1.3 {
    45     45     execsql {
    46         -    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
           46  +    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4);
    47     47     }
    48     48     catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
    49     49   } {1 {database disk image is malformed}}
    50     50   
    51     51   do_test 1.4 {
    52     52     db_restore_and_reopen
    53     53     execsql {
    54     54       UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    55         -    rowid = fts5_rowid('segment', $segid, 0, 4);
           55  +    rowid = fts5_rowid('segment', $segid, 4);
    56     56     }
    57     57     catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
    58     58   } {1 {database disk image is malformed}}
    59     59   
    60     60   db_restore_and_reopen
    61     61   #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
    62     62   

Changes to ext/fts5/test/fts5corrupt2.test.

   205    205         if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
   206    206         set {} 1
   207    207       } {1}
   208    208   
   209    209       execsql ROLLBACK
   210    210     }
   211    211   
   212         -  do_test 4.$tn.x { expr $nCorrupt>0 } 1
          212  +  # do_test 4.$tn.x { expr $nCorrupt>0 } 1
   213    213   }
   214    214   
   215    215   }
   216    216   
   217    217   set doc [string repeat "A B C " 1000]
   218         -do_execsql_test 4.0 {
          218  +do_execsql_test 5.0 {
   219    219     CREATE VIRTUAL TABLE x5 USING fts5(tt);
   220    220     INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
   221    221     WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
   222    222     INSERT INTO x5 SELECT $doc FROM ii;
   223    223   }
   224    224   
   225    225   foreach {tn hdr} {
................................................................................
   226    226     1 "\x00\x01"
   227    227   } {
   228    228     set tn2 0
   229    229     set nCorrupt 0
   230    230     foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
   231    231       if {$rowid & $mask} continue
   232    232       incr tn2
   233         -    do_test 4.$tn.$tn2 {
          233  +    do_test 5.$tn.$tn2 {
   234    234         execsql BEGIN
   235    235   
   236    236         set fd [db incrblob main x5_data block $rowid]
   237    237         fconfigure $fd -encoding binary -translation binary
   238    238         puts -nonewline $fd $hdr
   239    239         close $fd
   240    240   
................................................................................
   244    244   
   245    245       execsql ROLLBACK
   246    246     }
   247    247   }
   248    248   
   249    249   #--------------------------------------------------------------------
   250    250   reset_db
   251         -do_execsql_test 5.1 {
          251  +do_execsql_test 6.1 {
   252    252     CREATE VIRTUAL TABLE x5 USING fts5(tt);
   253    253     INSERT INTO x5 VALUES('a');
   254    254     INSERT INTO x5 VALUES('a a');
   255    255     INSERT INTO x5 VALUES('a a a');
   256    256     INSERT INTO x5 VALUES('a a a a');
   257    257   
   258    258     UPDATE x5_docsize SET sz = X'' WHERE id=3;
   259    259   }
   260    260   proc colsize {cmd i} { 
   261    261     $cmd xColumnSize $i
   262    262   }
   263    263   sqlite3_fts5_create_function db colsize colsize
   264    264   
   265         -do_catchsql_test 5.2 {
          265  +do_catchsql_test 6.2 {
   266    266     SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
   267    267   } {1 SQLITE_CORRUPT_VTAB}
   268    268   
   269    269   
   270    270   sqlite3_fts5_may_be_corrupt 0
   271    271   finish_test
   272    272   

Changes to ext/fts5/test/fts5rowid.test.

    23     23   
    24     24   do_catchsql_test 1.1 {
    25     25     SELECT fts5_rowid()
    26     26   } {1 {should be: fts5_rowid(subject, ....)}}
    27     27   
    28     28   do_catchsql_test 1.2 {
    29     29     SELECT fts5_rowid('segment')
    30         -} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}
           30  +} {1 {should be: fts5_rowid('segment', segid, pgno))}}
    31     31   
    32     32   do_execsql_test 1.3 {
    33         -  SELECT fts5_rowid('segment', 1, 1, 1)
    34         -} {139586437121}
           33  +  SELECT fts5_rowid('segment', 1, 1)
           34  +} {137438953473}
    35     35   
    36     36   do_catchsql_test 1.4 {
    37     37     SELECT fts5_rowid('nosucharg');
    38         -} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 
           38  +} {1 {first arg to fts5_rowid() must be 'segment'}} 
    39     39   
    40     40   
    41     41   #-------------------------------------------------------------------------
    42     42   # Tests of the fts5_decode() function.
    43     43   #
    44     44   reset_db
    45     45   do_execsql_test 2.1 { 

Added ext/fts5/test/fts5simple.test.

            1  +# 2015 September 05
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +#
           12  +
           13  +source [file join [file dirname [info script]] fts5_common.tcl]
           14  +set testprefix fts5simple
           15  +
           16  +# If SQLITE_ENABLE_FTS5 is defined, omit this file.
           17  +ifcapable !fts5 {
           18  +  finish_test
           19  +  return
           20  +}
           21  +
           22  +if 1 {
           23  +#-------------------------------------------------------------------------
           24  +#
           25  +set doc "x x [string repeat {y } 50]z z"
           26  +do_execsql_test 1.0 {
           27  +  CREATE VIRTUAL TABLE t1 USING fts5(x);
           28  +  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
           29  +  BEGIN;
           30  +    INSERT INTO t1 VALUES($doc);
           31  +  COMMIT;
           32  +}
           33  +
           34  +do_execsql_test 1.1 {
           35  +  INSERT INTO t1(t1) VALUES('integrity-check');
           36  +}
           37  +
           38  +#-------------------------------------------------------------------------
           39  +#
           40  +reset_db
           41  +do_execsql_test 2.0 {
           42  +  CREATE VIRTUAL TABLE t1 USING fts5(x);
           43  +  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
           44  +  INSERT INTO t1 VALUES('a b c');
           45  +  INSERT INTO t1 VALUES('d e f');
           46  +  INSERT INTO t1(t1) VALUES('optimize');
           47  +}
           48  +
           49  +do_execsql_test 2.1 {
           50  +  INSERT INTO t1(t1) VALUES('integrity-check');
           51  +} {}
           52  +
           53  +
           54  +#-------------------------------------------------------------------------
           55  +#
           56  +reset_db
           57  +do_execsql_test 3.0 {
           58  +  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix='1,2');
           59  +  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
           60  +  BEGIN;
           61  +  INSERT INTO t1 VALUES('one');
           62  +  SELECT * FROM t1 WHERE t1 MATCH 'o*';
           63  +} {one}
           64  +
           65  +do_execsql_test 3.1 {
           66  +  INSERT INTO t1(t1) VALUES('integrity-check');
           67  +} {}
           68  +
           69  +#-------------------------------------------------------------------------
           70  +reset_db
           71  +do_execsql_test 4.1 {
           72  +  CREATE VIRTUAL TABLE t11 USING fts5(content);
           73  +  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
           74  +  INSERT INTO t11 VALUES('another');
           75  +  INSERT INTO t11 VALUES('string');
           76  +  INSERT INTO t11 VALUES('of');
           77  +  INSERT INTO t11 VALUES('text');
           78  +}
           79  +do_test 4.2 {
           80  +  execsql { INSERT INTO t11(t11) VALUES('optimize') }
           81  +} {}
           82  +do_execsql_test 4.3 {
           83  +  INSERT INTO t11(t11) VALUES('integrity-check');
           84  +} {}
           85  +
           86  +#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x }
           87  +
           88  +#-------------------------------------------------------------------------
           89  +reset_db
           90  +set doc [string repeat "x y " 5]
           91  +do_execsql_test 5.1 {
           92  +  CREATE VIRTUAL TABLE yy USING fts5(content);
           93  +  INSERT INTO yy(yy, rank) VALUES('pgsz', 32);
           94  +  BEGIN;
           95  +    INSERT INTO yy VALUES($doc);
           96  +    INSERT INTO yy VALUES($doc);
           97  +    INSERT INTO yy VALUES($doc);
           98  +    INSERT INTO yy VALUES($doc);
           99  +    INSERT INTO yy VALUES($doc);
          100  +    INSERT INTO yy VALUES($doc);
          101  +    INSERT INTO yy VALUES($doc);
          102  +    INSERT INTO yy VALUES($doc);
          103  +  COMMIT;
          104  +}
          105  +
          106  +do_execsql_test 5.2 {
          107  +  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC
          108  +} {1 2 3 4 5 6 7 8}
          109  +
          110  +do_execsql_test 5.3 {
          111  +  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
          112  +} {8 7 6 5 4 3 2 1}
          113  +
          114  +#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }
          115  +
          116  +#-------------------------------------------------------------------------
          117  +reset_db
          118  +do_execsql_test 5.1 {
          119  +  CREATE VIRTUAL TABLE tt USING fts5(content);
          120  +  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
          121  +  INSERT INTO tt VALUES('aa');
          122  +}
          123  +
          124  +do_execsql_test 5.2 {
          125  +  SELECT rowid FROM tt WHERE tt MATCH 'a*';
          126  +} {1}
          127  +
          128  +do_execsql_test 5.3 {
          129  +  DELETE FROM tt;
          130  +  BEGIN;
          131  +    INSERT INTO tt VALUES('aa');
          132  +    INSERT INTO tt VALUES('ab');
          133  +  COMMIT;
          134  +} {}
          135  +
          136  +do_execsql_test 5.4 {
          137  +  SELECT rowid FROM tt WHERE tt MATCH 'a*';
          138  +} {1 2}
          139  +
          140  +}
          141  +
          142  +do_execsql_test 5.5 {
          143  +  DELETE FROM tt;
          144  +  BEGIN;
          145  +    INSERT INTO tt VALUES('aa');
          146  +    INSERT INTO tt VALUES('ab');
          147  +    INSERT INTO tt VALUES('aa');
          148  +    INSERT INTO tt VALUES('ab');
          149  +    INSERT INTO tt VALUES('aa');
          150  +    INSERT INTO tt VALUES('ab');
          151  +    INSERT INTO tt VALUES('aa');
          152  +    INSERT INTO tt VALUES('ab');
          153  +  COMMIT;
          154  +  SELECT rowid FROM tt WHERE tt MATCH 'a*';
          155  +} {1 2 3 4 5 6 7 8}
          156  +
          157  +do_execsql_test 5.6 {
          158  +  INSERT INTO tt(tt) VALUES('integrity-check');
          159  +}
          160  +
          161  +reset_db
          162  +do_execsql_test 5.7 {
          163  +  CREATE VIRTUAL TABLE tt USING fts5(content);
          164  +  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
          165  +  INSERT INTO tt VALUES('aa ab ac ad ae af');
          166  +}
          167  +
          168  +do_execsql_test 5.8 {
          169  +  SELECT rowid FROM tt WHERE tt MATCH 'a*';
          170  +} {1}
          171  +
          172  +finish_test
          173  +

Changes to ext/fts5/test/fts5version.test.

    26     26   do_execsql_test 1.1 {
    27     27     CREATE VIRTUAL TABLE t1 USING fts5(one);
    28     28     INSERT INTO t1 VALUES('a b c d');
    29     29   } {}
    30     30   
    31     31   do_execsql_test 1.2 {
    32     32     SELECT * FROM t1_config WHERE k='version'
    33         -} {version 3}
           33  +} {version 4}
    34     34   
    35     35   do_execsql_test 1.3 {
    36     36     SELECT rowid FROM t1 WHERE t1 MATCH 'a';
    37     37   } {1}
    38     38   
    39     39   do_execsql_test 1.4 {
    40         -  UPDATE t1_config set v=4 WHERE k='version';
           40  +  UPDATE t1_config set v=5 WHERE k='version';
    41     41   } 
    42     42   
    43     43   do_test 1.5 {
    44     44     db close
    45     45     sqlite3 db test.db
    46     46     catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
    47         -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}
           47  +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}
    48     48   
    49     49   do_test 1.6 {
    50     50     db close
    51     51     sqlite3 db test.db
    52     52     catchsql { INSERT INTO t1 VALUES('x y z') }
    53         -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}
           53  +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}
    54     54   
    55     55   do_test 1.7 {
    56     56     execsql { DELETE FROM t1_config WHERE k='version' }
    57     57     db close
    58     58     sqlite3 db test.db
    59     59     catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
    60         -} {1 {invalid fts5 file format (found 0, expected 3) - run 'rebuild'}}
           60  +} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}}
    61     61   
    62     62   
    63     63   finish_test
    64     64   

Changes to ext/fts5/tool/loadfts5.tcl.

    14     14     foreach f [glob -nocomplain -dir $dir *] {
    15     15       if {$::O(limit) && $::nRow>=$::O(limit)} break
    16     16       if {[file isdir $f]} {
    17     17         load_hierachy $f
    18     18       } else {
    19     19         db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
    20     20         incr ::nRow
           21  +
           22  +      if {$::O(trans) && ($::nRow % $::O(trans))==0} {
           23  +        db eval { COMMIT }
           24  +        db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
           25  +        db eval { BEGIN }
           26  +      }
    21     27   
    22     28         if {($::nRow % $::nRowPerDot)==0} {
    23     29           puts -nonewline .
    24     30           if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
    25     31           flush stdout
    26     32         }
    27     33   
................................................................................
    37     43     puts stderr "  -fts5        (use fts5)"
    38     44     puts stderr "  -porter      (use porter tokenizer)"
    39     45     puts stderr "  -delete      (delete the database file before starting)"
    40     46     puts stderr "  -limit N     (load no more than N documents)"
    41     47     puts stderr "  -automerge N (set the automerge parameter to N)"
    42     48     puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
    43     49     puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
           50  +  puts stderr "  -trans N     (commit after N inserts - 0 == never)"
    44     51     exit 1
    45     52   }
    46     53   
    47     54   set O(vtab)       fts5
    48     55   set O(tok)        ""
    49     56   set O(limit)      0
    50     57   set O(delete)     0
    51     58   set O(automerge)  -1
    52     59   set O(crisismerge)  -1
    53     60   set O(prefix)     ""
           61  +set O(trans)      0
    54     62   
    55     63   if {[llength $argv]<2} usage
    56     64   set nOpt [expr {[llength $argv]-2}]
    57     65   for {set i 0} {$i < $nOpt} {incr i} {
    58     66     set arg [lindex $argv $i]
    59     67     switch -- [lindex $argv $i] {
    60     68       -fts4 {
................................................................................
    73     81         set O(delete) 1
    74     82       }
    75     83   
    76     84       -limit {
    77     85         if { [incr i]>=$nOpt } usage
    78     86         set O(limit) [lindex $argv $i]
    79     87       }
           88  +
           89  +    -trans {
           90  +      if { [incr i]>=$nOpt } usage
           91  +      set O(trans) [lindex $argv $i]
           92  +    }
    80     93       
    81     94       -automerge {
    82     95         if { [incr i]>=$nOpt } usage
    83     96         set O(automerge) [lindex $argv $i]
    84     97       }
    85     98   
    86     99       -crisismerge {
................................................................................
   100    113   }
   101    114   
   102    115   set dbfile [lindex $argv end-1]
   103    116   if {$O(delete)} { file delete -force $dbfile }
   104    117   sqlite3 db $dbfile
   105    118   catch { load_static_extension db fts5 }
   106    119   db func loadfile loadfile
          120  +db eval "PRAGMA page_size=4096"
   107    121   
   108         -db transaction {
          122  +db eval BEGIN
   109    123     set pref ""
   110    124     if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
   111    125     catch {
   112    126       db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
   113    127       db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
   114    128     }
   115    129     if {$O(automerge)>=0} {
................................................................................
   122    136     if {$O(crisismerge)>=0} {
   123    137       if {$O(vtab) == "fts5"} {
   124    138         db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
   125    139       } else {
   126    140       }
   127    141     }
   128    142     load_hierachy [lindex $argv end]
   129         -}
          143  +db eval COMMIT
   130    144   
   131    145   
   132    146