/ Check-in [ce972f6a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When scanning the full-text index as part of the fts5 integrity-check, also run a point query for every term and verify that these results are consistent with those found by the linear scan.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: ce972f6aab90f6929d018696f1ab3c2649eca802
User & Date: dan 2015-03-21 15:37:19
Context
2015-03-21
15:45
Merge trunk changes with this branch. check-in: 14274391 user: dan tags: fts5
15:37
When scanning the full-text index as part of the fts5 integrity-check, also run a point query for every term and verify that these results are consistent with those found by the linear scan. check-in: ce972f6a user: dan tags: fts5
2015-03-11
14:51
Add an optimization to the fts5 unicode tokenizer code. check-in: f5db4892 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to ext/fts5/fts5Int.h.

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

/*
** for(
**   pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
**   0==sqlite3Fts5IterEof(pIter);
**   sqlite3Fts5IterNext(pIter)
** ){
**   i64 iDocid = sqlite3Fts5IndexDocid(pIter);
** }
*/

/*
** Open a new iterator to iterate though all docids that match the 
** specified token or token prefix.
*/







|







238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

/*
** for(
**   pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
**   0==sqlite3Fts5IterEof(pIter);
**   sqlite3Fts5IterNext(pIter)
** ){
**   i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/

/*
** Open a new iterator to iterate though all docids that match the 
** specified token or token prefix.
*/

Changes to ext/fts5/fts5_index.c.

4308
4309
4310
4311
4312
4313
4314


4315
4316
4317
4318
4319
4320
4321
....
4324
4325
4326
4327
4328
4329
4330
4331












4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343


4344
4345
4346
4347
4348
4349
4350


4351
4352
4353
4354
4355





















4356
4357
4358
4359
4360
4361

4362

4363
4364
4365
4366
4367
4368
4369
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */



  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
................................................................................
          fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
        }
      }
    }
    fts5StructureRelease(pStruct);
  }

  /* Check that the checksum of the index matches the argument checksum */












  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter, 0, 0)
    ){
      Fts5PosIter sPos;           /* Used to iterate through position list */
      int n;                      /* Size of term in bytes */
      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);



      for(fts5PosIterInit(p, pIter, &sPos);
          fts5PosIterEof(p, &sPos)==0;
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
#if 0
        fprintf(stdout, "rowid=%d ", (int)iRowid);


        fprintf(stdout, "term=%.*s ", n, z);
        fprintf(stdout, "col=%d ", sPos.iCol);
        fprintf(stdout, "off=%d\n", sPos.iPos);
        fflush(stdout);
#endif





















      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;



  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.







>
>







 







|
>
>
>
>
>
>
>
>
>
>
>
>












>
>





<
<
>
>
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






>

>







4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
....
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364


4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */

  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
................................................................................
          fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
        }
      }
    }
    fts5StructureRelease(pStruct);
  }

  /* The cksum argument passed to this function is a checksum calculated
  ** based on all expected entries in the FTS index (including prefix index
  ** entries). This block checks that a checksum calculated based on the
  ** actual contents of FTS index is identical.
  **
  ** Two versions of the same checksum are calculated. The first (stack
  ** variable cksum2) based on entries extracted from the full-text index
  ** while doing a linear scan of each individual index in turn. 
  **
  ** As each term visited by the linear scans, a separate query for the
  ** same term is performed. cksum3 is calculated based on the entries
  ** extracted by these queries.
  */
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
        fts5MultiIterNext(p, pIter, 0, 0)
    ){
      Fts5PosIter sPos;           /* Used to iterate through position list */
      int n;                      /* Size of term in bytes */
      i64 iRowid = fts5MultiIterRowid(pIter);
      char *z = (char*)fts5MultiIterTerm(pIter, &n);

      /* Update cksum2 with the entries associated with the current term
      ** and rowid.  */
      for(fts5PosIterInit(p, pIter, &sPos);
          fts5PosIterEof(p, &sPos)==0;
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);


      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        Fts5IndexIter *pIdxIter = 0;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
        while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
          const u8 *pPos;
          int nPos;
          i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
          rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
          if( rc==SQLITE_OK ){
            Fts5PoslistReader sReader;
            for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
                sReader.bEof==0;
                sqlite3Fts5PoslistReaderNext(&sReader)
            ){
              int iCol = FTS5_POS2COLUMN(sReader.iPos);
              int iOff = FTS5_POS2OFFSET(sReader.iPos);
              cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
            }
            rc = sqlite3Fts5IterNext(pIdxIter);
          }
        }
        sqlite3Fts5IterClose(pIdxIter);
        fts5BufferSet(&rc, &term, n, (const u8*)z);
        p->rc = rc;
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
  if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT;

  fts5BufferFree(&term);
  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.