/ Check-in [70fc69ee]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Explicitly limit the size of fts5 tokens to 32768 bytes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 70fc69eed9b09159899d7cbd1416a59d04210a63
User & Date: dan 2016-03-23 15:04:00
Context
2016-03-23
15:53
Remove an unused local variable from FTS5. check-in: 0ed693c2 user: drh tags: trunk
15:04
Explicitly limit the size of fts5 tokens to 32768 bytes. check-in: 70fc69ee user: dan tags: trunk
13:46
Update a requirement mark. No changes to code. check-in: 41298464 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

    43     43   ** Constants for the largest and smallest possible 64-bit signed integers.
    44     44   */
    45     45   # define LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
    46     46   # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
    47     47   
    48     48   #endif
    49     49   
           50  +/* Truncate very long tokens to this many bytes. Hard limit is 
           51  +** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
           52  +** field that occurs at the start of each leaf page (see fts5_index.c). */
           53  +#define FTS5_MAX_TOKEN_SIZE 32768
    50     54   
    51     55   /*
    52     56   ** Maximum number of prefix indexes on single FTS5 table. This must be
    53     57   ** less than 32. If it is set to anything large than that, an #error
    54     58   ** directive in fts5_index.c will cause the build to fail.
    55     59   */
    56     60   #define FTS5_MAX_PREFIX_INDEXES 31

Changes to ext/fts5/fts5_expr.c.

  1489   1489     TokenCtx *pCtx = (TokenCtx*)pContext;
  1490   1490     Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  1491   1491   
  1492   1492     UNUSED_PARAM2(iUnused1, iUnused2);
  1493   1493   
  1494   1494     /* If an error has already occurred, this is a no-op */
  1495   1495     if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
         1496  +  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
  1496   1497   
  1497   1498     if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
  1498   1499       Fts5ExprTerm *pSyn;
  1499   1500       int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
  1500   1501       pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
  1501   1502       if( pSyn==0 ){
  1502   1503         rc = SQLITE_NOMEM;
................................................................................
  2491   2492   ){
  2492   2493     Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
  2493   2494     Fts5Expr *pExpr = p->pExpr;
  2494   2495     int i;
  2495   2496   
  2496   2497     UNUSED_PARAM2(iUnused1, iUnused2);
  2497   2498   
         2499  +  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
  2498   2500     if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
  2499   2501     for(i=0; i<pExpr->nPhrase; i++){
  2500   2502       Fts5ExprTerm *pTerm;
  2501   2503       if( p->aPopulator[i].bOk==0 ) continue;
  2502   2504       for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
  2503   2505         int nTerm = (int)strlen(pTerm->zTerm);
  2504   2506         if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))

Changes to ext/fts5/fts5_index.c.

  2316   2316       pIter->iEndofDoclist = iTermOff + nExtra;
  2317   2317     }
  2318   2318     pIter->iPgidxOff = iPgidx;
  2319   2319   
  2320   2320     fts5SegIterLoadRowid(p, pIter);
  2321   2321     fts5SegIterLoadNPos(p, pIter);
  2322   2322   }
         2323  +
         2324  +static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
         2325  +  if( p->pIdxSelect==0 ){
         2326  +    Fts5Config *pConfig = p->pConfig;
         2327  +    fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
         2328  +          "SELECT pgno FROM '%q'.'%q_idx' WHERE "
         2329  +          "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
         2330  +          pConfig->zDb, pConfig->zName
         2331  +    ));
         2332  +  }
         2333  +  return p->pIdxSelect;
         2334  +}
  2323   2335   
  2324   2336   /*
  2325   2337   ** Initialize the object pIter to point to term pTerm/nTerm within segment
  2326   2338   ** pSeg. If there is no such term in the index, the iterator is set to EOF.
  2327   2339   **
  2328   2340   ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
  2329   2341   ** an error has already occurred when this function is called, it is a no-op.
................................................................................
  2334   2346     int flags,                      /* Mask of FTS5INDEX_XXX flags */
  2335   2347     Fts5StructureSegment *pSeg,     /* Description of segment */
  2336   2348     Fts5SegIter *pIter              /* Object to populate */
  2337   2349   ){
  2338   2350     int iPg = 1;
  2339   2351     int bGe = (flags & FTS5INDEX_QUERY_SCAN);
  2340   2352     int bDlidx = 0;                 /* True if there is a doclist-index */
         2353  +  sqlite3_stmt *pIdxSelect = 0;
  2341   2354   
  2342   2355     assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
  2343   2356     assert( pTerm && nTerm );
  2344   2357     memset(pIter, 0, sizeof(*pIter));
  2345   2358     pIter->pSeg = pSeg;
  2346   2359   
  2347   2360     /* This block sets stack variable iPg to the leaf page number that may
  2348   2361     ** contain term (pTerm/nTerm), if it is present in the segment. */
  2349         -  if( p->pIdxSelect==0 ){
  2350         -    Fts5Config *pConfig = p->pConfig;
  2351         -    fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
  2352         -          "SELECT pgno FROM '%q'.'%q_idx' WHERE "
  2353         -          "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
  2354         -          pConfig->zDb, pConfig->zName
  2355         -    ));
  2356         -  }
         2362  +  pIdxSelect = fts5IdxSelectStmt(p);
  2357   2363     if( p->rc ) return;
  2358         -  sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
  2359         -  sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
  2360         -  if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
  2361         -    i64 val = sqlite3_column_int(p->pIdxSelect, 0);
         2364  +  sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
         2365  +  sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
         2366  +  if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
         2367  +    i64 val = sqlite3_column_int(pIdxSelect, 0);
  2362   2368       iPg = (int)(val>>1);
  2363   2369       bDlidx = (val & 0x0001);
  2364   2370     }
  2365         -  p->rc = sqlite3_reset(p->pIdxSelect);
         2371  +  p->rc = sqlite3_reset(pIdxSelect);
  2366   2372   
  2367   2373     if( iPg<pSeg->pgnoFirst ){
  2368   2374       iPg = pSeg->pgnoFirst;
  2369   2375       bDlidx = 0;
  2370   2376     }
  2371   2377   
  2372   2378     pIter->iLeafPgno = iPg - 1;
................................................................................
  3548   3554   #ifdef SQLITE_DEBUG
  3549   3555         for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  3550   3556           for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
  3551   3557             assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
  3552   3558           }
  3553   3559         }
  3554   3560         assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
         3561  +
         3562  +      {
         3563  +        sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
         3564  +        if( p->rc==SQLITE_OK ){
         3565  +          int rc;
         3566  +          u8 aBlob[2] = {0xff, 0xff};
         3567  +          sqlite3_bind_int(pIdxSelect, 1, iSegid);
         3568  +          sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
         3569  +          assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
         3570  +          p->rc = sqlite3_reset(pIdxSelect);
         3571  +        }
         3572  +      }
  3555   3573   #endif
  3556   3574       }
  3557   3575     }
  3558   3576   
  3559   3577     return iSegid;
  3560   3578   }
  3561   3579   
................................................................................
  3793   3811     }
  3794   3812   }
  3795   3813   
  3796   3814   static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  3797   3815     static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  3798   3816     Fts5PageWriter *pPage = &pWriter->writer;
  3799   3817     i64 iRowid;
         3818  +
         3819  +static int nCall = 0;
         3820  +nCall++;
  3800   3821   
  3801   3822     assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
  3802   3823   
  3803   3824     /* Set the szLeaf header field. */
  3804   3825     assert( 0==fts5GetU16(&pPage->buf.p[2]) );
  3805   3826     fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
  3806   3827   

Changes to ext/fts5/fts5_storage.c.

   365    365     int nToken,                     /* Size of token in bytes */
   366    366     int iUnused1,                   /* Start offset of token */
   367    367     int iUnused2                    /* End offset of token */
   368    368   ){
   369    369     Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
   370    370     Fts5Index *pIdx = pCtx->pStorage->pIndex;
   371    371     UNUSED_PARAM2(iUnused1, iUnused2);
          372  +  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
   372    373     if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
   373    374       pCtx->szCol++;
   374    375     }
   375    376     return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
   376    377   }
   377    378   
   378    379   /*
................................................................................
   811    812     int bPresent;
   812    813     int ii;
   813    814     int rc = SQLITE_OK;
   814    815     int iPos;
   815    816     int iCol;
   816    817   
   817    818     UNUSED_PARAM2(iUnused1, iUnused2);
          819  +  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
   818    820   
   819    821     if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
   820    822       pCtx->szCol++;
   821    823     }
   822    824   
   823    825     switch( pCtx->pConfig->eDetail ){
   824    826       case FTS5_DETAIL_FULL:

Changes to ext/fts5/test/fts5simple.test.

   443    443     execsql { INSERT INTO x1(x1) VALUES('optimize'); }
   444    444     execsql { DELETE FROM x1 WHERE rowid = 4; }
   445    445   } {}
   446    446   do_execsql_test 20.2 {
   447    447     INSERT INTO x1(x1) VALUES('optimize');
   448    448     INSERT INTO x1(x1) VALUES('integrity-check');
   449    449   } {}
          450  +
          451  +#-------------------------------------------------------------------------
          452  +reset_db
          453  +set doc "a b [string repeat x 100000]"
          454  +do_execsql_test 21.0 {
          455  +  CREATE VIRTUAL TABLE x1 USING fts5(x);
          456  +  INSERT INTO x1(rowid, x) VALUES(11111, $doc);
          457  +  INSERT INTO x1(rowid, x) VALUES(11112, $doc);
          458  +}
          459  +do_execsql_test 21.1 {
          460  +  INSERT INTO x1(x1) VALUES('integrity-check');
          461  +}
          462  +do_execsql_test 21.2 {
          463  +  SELECT rowid FROM x1($doc);
          464  +} {11111 11112}
          465  +do_execsql_test 21.3 {
          466  +  DELETE FROM x1 WHERE rowid=11111;
          467  +  INSERT INTO x1(x1) VALUES('integrity-check');
          468  +  SELECT rowid FROM x1($doc);
          469  +} {11112}
   450    470   
   451    471   finish_test