/ Check-in [0cdf5028]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-prefix-query-fix
Files: files | file ages | folders
SHA1: 0cdf502885ea7e5805d7ba3719f055f5d48fc78d
User & Date: dan 2015-04-06 09:05:29
Context
2015-04-06
11:04
Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids. check-in: 3ad829e5 user: dan tags: trunk
09:05
Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids. Closed-Leaf check-in: 0cdf5028 user: dan tags: fts3-prefix-query-fix
2015-04-04
16:49
Fix a problem with resolving ORDER BY clauses that feature COLLATE clauses attached to compound SELECT statements. check-in: edc1de2a user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  2498   2498   **
  2499   2499   ** If the docids in the input doclists are sorted in ascending order,
  2500   2500   ** parameter bDescDoclist should be false. If they are sorted in ascending 
  2501   2501   ** order, it should be passed a non-zero value.
  2502   2502   **
  2503   2503   ** The right-hand input doclist is overwritten by this function.
  2504   2504   */
  2505         -static void fts3DoclistPhraseMerge(
         2505  +static int fts3DoclistPhraseMerge(
  2506   2506     int bDescDoclist,               /* True if arguments are desc */
  2507   2507     int nDist,                      /* Distance from left to right (1=adjacent) */
  2508   2508     char *aLeft, int nLeft,         /* Left doclist */
  2509         -  char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
         2509  +  char **paRight, int *pnRight    /* IN/OUT: Right/output doclist */
  2510   2510   ){
  2511   2511     sqlite3_int64 i1 = 0;
  2512   2512     sqlite3_int64 i2 = 0;
  2513   2513     sqlite3_int64 iPrev = 0;
         2514  +  char *aRight = *paRight;
  2514   2515     char *pEnd1 = &aLeft[nLeft];
  2515   2516     char *pEnd2 = &aRight[*pnRight];
  2516   2517     char *p1 = aLeft;
  2517   2518     char *p2 = aRight;
  2518   2519     char *p;
  2519   2520     int bFirstOut = 0;
  2520         -  char *aOut = aRight;
         2521  +  char *aOut;
  2521   2522   
  2522   2523     assert( nDist>0 );
  2523         -
         2524  +  if( bDescDoclist ){
         2525  +    aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX);
         2526  +    if( aOut==0 ) return SQLITE_NOMEM;
         2527  +  }else{
         2528  +    aOut = aRight;
         2529  +  }
  2524   2530     p = aOut;
         2531  +
  2525   2532     fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  2526   2533     fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
  2527   2534   
  2528   2535     while( p1 && p2 ){
  2529   2536       sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
  2530   2537       if( iDiff==0 ){
  2531   2538         char *pSave = p;
................................................................................
  2546   2553       }else{
  2547   2554         fts3PoslistCopy(0, &p2);
  2548   2555         fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2549   2556       }
  2550   2557     }
  2551   2558   
  2552   2559     *pnRight = (int)(p - aOut);
         2560  +  if( bDescDoclist ){
         2561  +    sqlite3_free(aRight);
         2562  +    *paRight = aOut;
         2563  +  }
         2564  +
         2565  +  return SQLITE_OK;
  2553   2566   }
  2554   2567   
  2555   2568   /*
  2556   2569   ** Argument pList points to a position list nList bytes in size. This
  2557   2570   ** function checks to see if the position list contains any entries for
  2558   2571   ** a token in position 0 (of any column). If so, it writes argument iDelta
  2559   2572   ** to the output buffer pOut, followed by a position list consisting only
................................................................................
  2670   2683     Fts3Table *p,                   /* FTS table handle */
  2671   2684     TermSelect *pTS,                /* TermSelect object to merge into */
  2672   2685     char *aDoclist,                 /* Pointer to doclist */
  2673   2686     int nDoclist                    /* Size of aDoclist in bytes */
  2674   2687   ){
  2675   2688     if( pTS->aaOutput[0]==0 ){
  2676   2689       /* If this is the first term selected, copy the doclist to the output
  2677         -    ** buffer using memcpy(). */
  2678         -    pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
         2690  +    ** buffer using memcpy(). 
         2691  +    **
         2692  +    ** Add FTS3_VARINT_MAX bytes of unused space to the end of the 
         2693  +    ** allocation. This is so as to ensure that the buffer is big enough
         2694  +    ** to hold the current doclist AND'd with any other doclist. If the
         2695  +    ** doclists are stored in order=ASC order, this padding would not be
         2696  +    ** required (since the size of [doclistA AND doclistB] is always less
         2697  +    ** than or equal to the size of [doclistA] in that case). But this is
         2698  +    ** not true for order=DESC. For example, a doclist containing (1, -1) 
         2699  +    ** may be smaller than (-1), as in the first example the -1 may be stored
         2700  +    ** as a single-byte delta, whereas in the second it must be stored as a
         2701  +    ** FTS3_VARINT_MAX byte varint.
         2702  +    **
         2703  +    ** Similar padding is added in the fts3DoclistOrMerge() function.
         2704  +    */
         2705  +    pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1);
  2679   2706       pTS->anOutput[0] = nDoclist;
  2680   2707       if( pTS->aaOutput[0] ){
  2681   2708         memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
  2682   2709       }else{
  2683   2710         return SQLITE_NOMEM;
  2684   2711       }
  2685   2712     }else{
................................................................................
  3927   3954   /*
  3928   3955   ** Arguments pList/nList contain the doclist for token iToken of phrase p.
  3929   3956   ** It is merged into the main doclist stored in p->doclist.aAll/nAll.
  3930   3957   **
  3931   3958   ** This function assumes that pList points to a buffer allocated using
  3932   3959   ** sqlite3_malloc(). This function takes responsibility for eventually
  3933   3960   ** freeing the buffer.
         3961  +**
         3962  +** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs.
  3934   3963   */
  3935         -static void fts3EvalPhraseMergeToken(
         3964  +static int fts3EvalPhraseMergeToken(
  3936   3965     Fts3Table *pTab,                /* FTS Table pointer */
  3937   3966     Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  3938   3967     int iToken,                     /* Token pList/nList corresponds to */
  3939   3968     char *pList,                    /* Pointer to doclist */
  3940   3969     int nList                       /* Number of bytes in pList */
  3941   3970   ){
         3971  +  int rc = SQLITE_OK;
  3942   3972     assert( iToken!=p->iDoclistToken );
  3943   3973   
  3944   3974     if( pList==0 ){
  3945   3975       sqlite3_free(p->doclist.aAll);
  3946   3976       p->doclist.aAll = 0;
  3947   3977       p->doclist.nAll = 0;
  3948   3978     }
................................................................................
  3973   4003         pRight = p->doclist.aAll;
  3974   4004         nRight = p->doclist.nAll;
  3975   4005         pLeft = pList;
  3976   4006         nLeft = nList;
  3977   4007         nDiff = p->iDoclistToken - iToken;
  3978   4008       }
  3979   4009   
  3980         -    fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
         4010  +    rc = fts3DoclistPhraseMerge(
         4011  +        pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight
         4012  +    );
  3981   4013       sqlite3_free(pLeft);
  3982   4014       p->doclist.aAll = pRight;
  3983   4015       p->doclist.nAll = nRight;
  3984   4016     }
  3985   4017   
  3986   4018     if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
         4019  +  return rc;
  3987   4020   }
  3988   4021   
  3989   4022   /*
  3990   4023   ** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
  3991   4024   ** does not take deferred tokens into account.
  3992   4025   **
  3993   4026   ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
................................................................................
  4005   4038       assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
  4006   4039   
  4007   4040       if( pToken->pSegcsr ){
  4008   4041         int nThis = 0;
  4009   4042         char *pThis = 0;
  4010   4043         rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
  4011   4044         if( rc==SQLITE_OK ){
  4012         -        fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
         4045  +        rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
  4013   4046         }
  4014   4047       }
  4015   4048       assert( pToken->pSegcsr==0 );
  4016   4049     }
  4017   4050   
  4018   4051     return rc;
  4019   4052   }
................................................................................
  4807   4840           ** part of a multi-token phrase. Either way, the entire doclist will
  4808   4841           ** (eventually) be loaded into memory. It may as well be now. */
  4809   4842           Fts3PhraseToken *pToken = pTC->pToken;
  4810   4843           int nList = 0;
  4811   4844           char *pList = 0;
  4812   4845           rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
  4813   4846           assert( rc==SQLITE_OK || pList==0 );
         4847  +        if( rc==SQLITE_OK ){
         4848  +          rc = fts3EvalPhraseMergeToken(
         4849  +              pTab, pTC->pPhrase, pTC->iToken,pList,nList
         4850  +          );
         4851  +        }
  4814   4852           if( rc==SQLITE_OK ){
  4815   4853             int nCount;
  4816         -          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
  4817   4854             nCount = fts3DoclistCountDocids(
  4818   4855                 pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
  4819   4856             );
  4820   4857             if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
  4821   4858           }
  4822   4859         }
  4823   4860       }

Changes to test/fts3prefix.test.

   269    269     CREATE VIRTUAL TABLE t2 USING fts4(prefix=);
   270    270     INSERT INTO t1 VALUES('He dressed himself in cycling clothes');
   271    271     INSERT INTO t2 VALUES('He dressed himself in cycling clothes');
   272    272   } {}
   273    273   do_execsql_test 6.5.2 {
   274    274     SELECT md5sum(quote(root)) FROM t1_segdir;
   275    275   } [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
          276  +
          277  +
          278  +do_execsql_test 7.0 {
          279  +  CREATE VIRTUAL TABLE t6 USING fts4(x,order=DESC);
          280  +  INSERT INTO t6(docid, x) VALUES(-1,'a b');
          281  +  INSERT INTO t6(docid, x) VALUES(1, 'b');
          282  +}
          283  +do_execsql_test 7.1 {
          284  +  SELECT docid FROM t6 WHERE t6 MATCH '"a* b"';
          285  +} {-1}
          286  +do_execsql_test 7.2 {
          287  +  SELECT docid FROM t6 WHERE t6 MATCH 'a*';
          288  +} {-1}
          289  +do_execsql_test 7.3 {
          290  +  SELECT docid FROM t6 WHERE t6 MATCH 'a* b';
          291  +} {-1}
          292  +
          293  +
   276    294   
   277    295   finish_test