/ Check-in [ba359d78]
Login
Overview
Comment:Add "segment promotion" to fts5. This prevents the FTS index from growing indefinitely as data is added and deleted.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1:ba359d78e166d78e0dc89e3c63a9a41e9ffea989
User & Date: dan 2014-08-07 18:47:33
Context
2014-08-09
18:02
Use multiple memory allocations for a single Fts5Structure object. This is probably less efficient but much easier to get right. check-in: 2821825f user: dan tags: fts5
2014-08-07
18:47
Add "segment promotion" to fts5. This prevents the FTS index from growing indefinitely as data is added and deleted. check-in: ba359d78 user: dan tags: fts5
2014-08-06
20:04
Avoid writing delete markers to the oldest segment in an FTS index. check-in: 1baeb1ce user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

651
652
653
654
655
656
657
























658
659
660
661
662
663
664
....
1075
1076
1077
1078
1079
1080
1081















































































































































































1082
1083
1084
1085
1086
1087
1088
....
3147
3148
3149
3150
3151
3152
3153

3154
3155
3156
3157
3158
3159
3160
....
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
  }else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)",
        iIdx, iSegid, iHeight, iPgno
    );
  }
}


























static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
................................................................................
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
    }
  }

  fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n);
  fts5BufferFree(&buf);
}

















































































































































































/*
** If the pIter->iOff offset currently points to an entry indicating one
** or more term-less nodes, advance past it and set pIter->nEmpty to
** the number of empty child nodes.
*/
................................................................................
    for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
      assert( pStruct->aLevel[iLvl].nSeg==0 );
    }
#endif

    if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break;
    fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);

    assert( nRem==0 || p->rc==SQLITE_OK );
  }
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
................................................................................
*/
static void fts5DecodeStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
  const u8 *pBlob, int nBlob
){
  int rc;                         /* Return code */
  int iLvl, iSeg;                 /* Iterate through levels, segments */
  Fts5Structure *p = 0;           /* Decoded structure object */

  rc = fts5StructureDecode(pBlob, nBlob, &p);
  if( rc!=SQLITE_OK ){
    *pRc = rc;
    return;
  }

  for(iLvl=0; iLvl<p->nLevel; iLvl++){
    Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
        " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge
    );
    for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
      Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, 
          pSeg->pgnoFirst, pSeg->pgnoLast
      );
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }

  fts5StructureRelease(p);
}

/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







<








|
<
<
<
<
<
<
<
<
<
<
<
<
<
<







651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
....
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
....
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
....
3885
3886
3887
3888
3889
3890
3891

3892
3893
3894
3895
3896
3897
3898
3899
3900














3901
3902
3903
3904
3905
3906
3907
  }else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)",
        iIdx, iSegid, iHeight, iPgno
    );
  }
}

static void fts5DebugStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
  Fts5Structure *p
){
  int iLvl, iSeg;                 /* Iterate through levels, segments */

  for(iLvl=0; iLvl<p->nLevel; iLvl++){
    Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
        " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge
    );
    for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
      Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, 
          pSeg->pgnoFirst, pSeg->pgnoLast
      );
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }
}



static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
................................................................................
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
    }
  }

  fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n);
  fts5BufferFree(&buf);
}

#if 0
static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
  int rc = SQLITE_OK;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(buf));
  fts5DebugStructure(&rc, &buf, pStruct);
  fprintf(stdout, "%s: %s\n", zCaption, buf.p);
  fflush(stdout);
  fts5BufferFree(&buf);
}
#else
# define fts5PrintStructure(x,y)
#endif

/*
** Return a copy of index structure pStruct. Except, promote as many segments
** as possible to level iPromote. If an OOM occurs, NULL is returned.
*/
static void fts5StructurePromoteTo(
  Fts5Index *p,
  int iPromote,
  int szPromote,
  Fts5Structure *pStruct
){
  Fts5Structure *pNew;
  u8 *pSpace;
  int nSeg = fts5StructureCountSegments(pStruct);
  int nLvl = pStruct->nLevel;
  int nByte = (
      sizeof(Fts5Structure) + 
      sizeof(Fts5StructureLevel) * (nLvl+1) +
      sizeof(Fts5StructureSegment) * (nSeg+nLvl+1)
  );
  int iTst;

  pNew = fts5IdxMalloc(p, nByte);
  if( !pNew ) return;
  pNew->nWriteCounter = pStruct->nWriteCounter;
  pNew->nLevel = pStruct->nLevel;
  pSpace = (u8*)&pNew->aLevel[nLvl+1];

  for(iTst=0; iTst<nLvl; iTst++){
    int nCopy;
    Fts5StructureLevel *pLvlOut = &pNew->aLevel[iTst];
    pLvlOut->aSeg = (Fts5StructureSegment*)pSpace;

    if( iTst==iPromote ){
      int il, is;
      int nSegCopy = 0;

      /* Figure out the number of segments that will be promoted. */
      for(il=iTst+1; il<pStruct->nLevel; il++){
        Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
        if( pLvl->nMerge ) break;
        for(is=pLvl->nSeg-1; is>=0; is--){
          Fts5StructureSegment *pSeg = &pLvl->aSeg[is];
          int sz = pSeg->pgnoLast - pSeg->pgnoFirst + 1;
          if( sz>szPromote ){
            il = pStruct->nLevel;
            break;
          }
          nSegCopy++;
        }
      }
      assert( nSegCopy>0 );
      pSpace += (nSegCopy * sizeof(Fts5StructureSegment));
      pLvlOut->nSeg = nSegCopy;

      for(il=iTst+1; il<pStruct->nLevel && nSegCopy>0; il++){
        Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
        for(is=pLvl->nSeg-1; is>=0 && nSegCopy>0; is--){
          Fts5StructureSegment *pSeg = &pLvl->aSeg[is];
          nSegCopy--;
          memcpy(&pLvlOut->aSeg[nSegCopy], pSeg, sizeof(Fts5StructureSegment));
          pLvl->nSeg--;
        }
      }
      assert( nSegCopy==0 );
    }

    nCopy = pStruct->aLevel[iTst].nSeg * sizeof(Fts5StructureSegment);
    if( nCopy ) memcpy(pSpace, pStruct->aLevel[iTst].aSeg, nCopy);
    pSpace += (nCopy + sizeof(Fts5StructureSegment));
    pLvlOut->nSeg += pStruct->aLevel[iTst].nSeg;
  }

  fts5PrintStructure("NEW", pNew);
  memcpy(pStruct, pNew, nByte);
  for(iTst=0; iTst<pNew->nLevel; iTst++){
    int iOff = pNew->aLevel[iTst].aSeg - (Fts5StructureSegment*)pNew;
    pStruct->aLevel[iTst].aSeg = &((Fts5StructureSegment*)pStruct)[iOff];
  }
  sqlite3_free(pNew);
}

/*
** A new segment has just been written to level iLvl of index structure
** pStruct. This function determines if any segments should be promoted
** as a result. Segments are promoted in two scenarios:
**
**   a) If the segment just written is smaller than one or more segments
**      within the previous populated level, it is promoted to the previous
**      populated level.
**
**   b) If the segment just written is larger than the newest segment on
**      the next populated level, then that segment, and any other adjacent
**      segments that are also smaller than the one just written, are 
**      promoted. 
**
** If one or more segments are promoted, the structure object is updated
** to reflect this.
*/
static void fts5StructurePromote(
  Fts5Index *p,                   /* FTS5 backend object */
  int iLvl,                       /* Index level just updated */
  Fts5Structure *pStruct          /* Index structure */
){
  if( p->rc==SQLITE_OK ){
    int iTst;
    int iPromote = -1;
    int szPromote;                /* Promote anything this size or smaller */
    Fts5StructureSegment *pSeg;   /* Segment just written */
    Fts5StructureLevel *pTst;
    int szSeg;                    /* Size of segment just written */


    pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
    szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);

    /* Check for condition (a) */
    for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
    pTst = &pStruct->aLevel[iTst];
    if( iTst>=0 && pTst->nMerge==0 ){
      int i;
      int szMax = 0;
      for(i=0; i<pTst->nSeg; i++){
        int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
        if( sz>szMax ) szMax = sz;
      }
      if( szMax>=szSeg ){
        /* Condition (a) is true. Promote the newest segment on level 
        ** iLvl to level iTst.  */
        iPromote = iTst;
        szPromote = szMax;
      }
    }

    /* Check for condition (b) */
    if( iPromote<0 ){
      Fts5StructureLevel *pTst;
      for(iTst=iLvl+1; iTst<pStruct->nLevel; iTst++){
        pTst = &pStruct->aLevel[iTst];
        if( pTst->nSeg ) break;
      }
      if( iTst<pStruct->nLevel && pTst->nMerge==0 ){
        Fts5StructureSegment *pSeg2 = &pTst->aSeg[pTst->nSeg-1];
        int sz = pSeg2->pgnoLast - pSeg2->pgnoFirst + 1;
        if( sz<=szSeg ){
          iPromote = iLvl;
          szPromote = szSeg;
        }
      }
    }

    /* If iPromote is greater than or equal to zero at this point, then it
    ** is the level number of a level to which segments that consist of
    ** szPromote or fewer pages should be promoted. */ 
    if( iPromote>=0 ){
      fts5PrintStructure("BEFORE", pStruct);
      fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
      fts5PrintStructure("AFTER", pStruct);
    }
  }
}


/*
** If the pIter->iOff offset currently points to an entry indicating one
** or more term-less nodes, advance past it and set pIter->nEmpty to
** the number of empty child nodes.
*/
................................................................................
    for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
      assert( pStruct->aLevel[iLvl].nSeg==0 );
    }
#endif

    if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break;
    fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);
    fts5StructurePromote(p, iBestLvl+1, pStruct);
    assert( nRem==0 || p->rc==SQLITE_OK );
  }
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
................................................................................
*/
static void fts5DecodeStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
  const u8 *pBlob, int nBlob
){
  int rc;                         /* Return code */

  Fts5Structure *p = 0;           /* Decoded structure object */

  rc = fts5StructureDecode(pBlob, nBlob, &p);
  if( rc!=SQLITE_OK ){
    *pRc = rc;
    return;
  }

  fts5DebugStructure(pRc, pBuf, p);














  fts5StructureRelease(p);
}

/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.

Changes to test/fts5aj.test.

53
54
55
56
57
58
59
60


61
62
63
64
65
66
67
68
69
70
71
  if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
  set new [doc]
  execsql { INSERT INTO t1 VALUES($new) }
  if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
  if {0==($iTest % 1000)} {
    set sz [db one {SELECT count(*) FROM t1_data}]
    set s [structure]
    do_test 1.$iTest.$sz.{$s} {} {}


  }
}

#db eval { SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }



finish_test








|
>
>



<
<

<




53
54
55
56
57
58
59
60
61
62
63
64
65


66

67
68
69
70
  if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
  set new [doc]
  execsql { INSERT INTO t1 VALUES($new) }
  if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
  if {0==($iTest % 1000)} {
    set sz [db one {SELECT count(*) FROM t1_data}]
    set s [structure]
    do_execsql_test 1.$iTest.$sz.{$s} {
      INSERT INTO t1(t1) VALUES('integrity-check') 
    }
  }
}



do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }



finish_test