/ Check-in [d59f5809]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix another problem in stat4 sample selection.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d59f580904e6e7e90fc0a692a3dd4eeff5942479
User & Date: dan 2013-09-03 14:43:12
Context
2013-09-03
19:26
Harden the STAT4 logic in where.c against OOM faults. check-in: 91d2cfbc user: drh tags: trunk
14:43
Fix another problem in stat4 sample selection. check-in: d59f5809 user: dan tags: trunk
14:03
Make sure the omit-noop-left-join optimization is not applied if columns of the LEFT JOIN are used in the ORDER BY clause. Ticket [be84e357c035] check-in: 0303d6bc user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

   378    378     statInit,        /* xFunc */
   379    379     0,               /* xStep */
   380    380     0,               /* xFinalize */
   381    381     "stat_init",     /* zName */
   382    382     0,               /* pHash */
   383    383     0                /* pDestructor */
   384    384   };
          385  +
          386  +#ifdef SQLITE_ENABLE_STAT4
          387  +/*
          388  +** pNew and pOld are both candidate non-periodic samples selected for 
          389  +** the same column (pNew->iCol==pOld->iCol). Ignoring this column and 
          390  +** considering only any trailing columns and the sample hash value, this
          391  +** function returns true if sample pNew is to be preferred over pOld.
          392  +** In other words, if we assume that the cardinalities of the selected
          393  +** column for pNew and pOld are equal, is pNew to be preferred over pOld.
          394  +**
          395  +** This function assumes that for each argument sample, the contents of
          396  +** the anEq[] array from pSample->anEq[pSample->iCol+1] onwards are valid. 
          397  +*/
          398  +static int sampleIsBetterPost(
          399  +  Stat4Accum *pAccum, 
          400  +  Stat4Sample *pNew, 
          401  +  Stat4Sample *pOld
          402  +){
          403  +  int nCol = pAccum->nCol;
          404  +  int i;
          405  +  assert( pNew->iCol==pOld->iCol );
          406  +  for(i=pNew->iCol+1; i<nCol; i++){
          407  +    if( pNew->anEq[i]>pOld->anEq[i] ) return 1;
          408  +    if( pNew->anEq[i]<pOld->anEq[i] ) return 0;
          409  +  }
          410  +  if( pNew->iHash>pOld->iHash ) return 1;
          411  +  return 0;
          412  +}
          413  +#endif
   385    414   
   386    415   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
   387    416   /*
   388    417   ** Return true if pNew is to be preferred over pOld.
          418  +**
          419  +** This function assumes that for each argument sample, the contents of
          420  +** the anEq[] array from pSample->anEq[pSample->iCol] onwards are valid. 
   389    421   */
   390         -static int sampleIsBetter(Stat4Sample *pNew, Stat4Sample *pOld){
          422  +static int sampleIsBetter(
          423  +  Stat4Accum *pAccum, 
          424  +  Stat4Sample *pNew, 
          425  +  Stat4Sample *pOld
          426  +){
   391    427     tRowcnt nEqNew = pNew->anEq[pNew->iCol];
   392    428     tRowcnt nEqOld = pOld->anEq[pOld->iCol];
   393    429   
   394    430     assert( pOld->isPSample==0 && pNew->isPSample==0 );
   395    431     assert( IsStat4 || (pNew->iCol==0 && pOld->iCol==0) );
   396    432   
   397         -  if( (nEqNew>nEqOld)
   398         -   || (nEqNew==nEqOld && pNew->iCol<pOld->iCol)
   399         -   || (nEqNew==nEqOld && pNew->iCol==pOld->iCol && pNew->iHash>pOld->iHash)
   400         -  ){
   401         -    return 1;
          433  +  if( (nEqNew>nEqOld) ) return 1;
          434  +#ifdef SQLITE_ENABLE_STAT4
          435  +  if( nEqNew==nEqOld ){
          436  +    if( pNew->iCol<pOld->iCol ) return 1;
          437  +    return (pNew->iCol==pOld->iCol && sampleIsBetterPost(pAccum, pNew, pOld));
   402    438     }
   403    439     return 0;
          440  +#else
          441  +  return (nEqNew==nEqOld && pNew->iHash>pOld->iHash);
          442  +#endif
   404    443   }
   405    444   
   406    445   /*
   407    446   ** Copy the contents of object (*pFrom) into (*pTo).
   408    447   */
   409    448   void sampleCopy(Stat4Accum *p, Stat4Sample *pTo, Stat4Sample *pFrom){
   410    449     pTo->iRowid = pFrom->iRowid;
................................................................................
   419    458   /*
   420    459   ** Copy the contents of sample *pNew into the p->a[] array. If necessary,
   421    460   ** remove the least desirable sample from p->a[] to make room.
   422    461   */
   423    462   static void sampleInsert(Stat4Accum *p, Stat4Sample *pNew, int nEqZero){
   424    463     Stat4Sample *pSample;
   425    464     int i;
   426         -  i64 iSeq;
   427         -  int iPos;
   428    465   
   429    466     assert( IsStat4 || nEqZero==0 );
   430    467   
   431    468     if( pNew->isPSample==0 ){
   432    469       Stat4Sample *pUpgrade = 0;
   433    470       assert( pNew->anEq[pNew->iCol]>0 );
   434    471   
................................................................................
   437    474       ** added a sample that shares this prefix, there is no need to add
   438    475       ** this one. Instead, upgrade the priority of the highest priority
   439    476       ** existing sample that shares this prefix.  */
   440    477       for(i=p->nSample-1; i>=0; i--){
   441    478         Stat4Sample *pOld = &p->a[i];
   442    479         if( pOld->anEq[pNew->iCol]==0 ){
   443    480           if( pOld->isPSample ) return;
   444         -        assert( sampleIsBetter(pNew, pOld) );
   445         -        if( pUpgrade==0 || sampleIsBetter(pOld, pUpgrade) ){
          481  +        assert( pOld->iCol>pNew->iCol );
          482  +        assert( sampleIsBetter(p, pNew, pOld) );
          483  +        if( pUpgrade==0 || sampleIsBetter(p, pOld, pUpgrade) ){
   446    484             pUpgrade = pOld;
   447    485           }
   448    486         }
   449    487       }
   450    488       if( pUpgrade ){
   451    489         pUpgrade->iCol = pNew->iCol;
   452    490         pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol];
................................................................................
   476    514          || pNew->anLt[p->nCol-1] > p->a[p->nSample-1].anLt[p->nCol-1] );
   477    515   #endif
   478    516   
   479    517     /* Insert the new sample */
   480    518     pSample = &p->a[p->nSample];
   481    519     sampleCopy(p, pSample, pNew);
   482    520     p->nSample++;
   483         -
   484         -#if 0
   485         -  iSeq = pNew->anLt[p->nCol-1];
   486         -  for(iPos=p->nSample; iPos>0; iPos--){
   487         -    if( iSeq>p->a[iPos-1].anLt[p->nCol-1] ) break;
   488         -  }
   489         -
   490         -  if( iPos!=p->nSample ){
   491         -    Stat4Sample *pEnd = &p->a[p->nSample];
   492         -    tRowcnt *anEq = pEnd->anEq;
   493         -    tRowcnt *anLt = pEnd->anLt;
   494         -    tRowcnt *anDLt = pEnd->anDLt;
   495         -    memmove(&p->a[iPos], &p->a[iPos+1], (p->nSample-iPos)*sizeof(p->a[0]));
   496         -    pSample->anEq = anEq;
   497         -    pSample->anDLt = anDLt;
   498         -    pSample->anLt = anLt;
   499         -  }
   500         -#endif
   501         -
   502    521   
   503    522     /* Zero the first nEqZero entries in the anEq[] array. */
   504    523     memset(pSample->anEq, 0, sizeof(tRowcnt)*nEqZero);
   505    524   
   506    525    find_new_min:
   507    526     if( p->nSample>=p->mxSample ){
   508    527       int iMin = -1;
   509    528       for(i=0; i<p->mxSample; i++){
   510    529         if( p->a[i].isPSample ) continue;
   511         -      if( iMin<0 || sampleIsBetter(&p->a[iMin], &p->a[i]) ){
          530  +      if( iMin<0 || sampleIsBetter(p, &p->a[iMin], &p->a[i]) ){
   512    531           iMin = i;
   513    532         }
   514    533       }
   515    534       assert( iMin>=0 );
   516    535       p->iMin = iMin;
   517    536     }
   518    537   }
................................................................................
   528    547   #ifdef SQLITE_ENABLE_STAT4
   529    548     int i;
   530    549   
   531    550     /* Check if any samples from the aBest[] array should be pushed
   532    551     ** into IndexSample.a[] at this point.  */
   533    552     for(i=(p->nCol-2); i>=iChng; i--){
   534    553       Stat4Sample *pBest = &p->aBest[i];
   535         -    if( p->nSample<p->mxSample
   536         -     || sampleIsBetter(pBest, &p->a[p->iMin])
   537         -    ){
          554  +    pBest->anEq[i] = p->current.anEq[i];
          555  +    if( p->nSample<p->mxSample || sampleIsBetter(p, pBest, &p->a[p->iMin]) ){
   538    556         sampleInsert(p, pBest, i);
   539    557       }
   540    558     }
   541    559   
   542    560     /* Update the anEq[] fields of any samples already collected. */
   543    561     for(i=p->nSample-1; i>=0; i--){
   544    562       int j;
................................................................................
   557    575       if( (nLt/p->nPSample)!=(nLt+nEq)/p->nPSample ){
   558    576         p->current.isPSample = 1;
   559    577         sampleInsert(p, &p->current, 0);
   560    578         p->current.isPSample = 0;
   561    579       }else 
   562    580   
   563    581       /* Or if it is a non-periodic sample. Add it in this case too. */
   564         -    if( p->nSample<p->mxSample || sampleIsBetter(&p->current, &p->a[p->iMin]) ){
          582  +    if( p->nSample<p->mxSample 
          583  +     || sampleIsBetter(p, &p->current, &p->a[p->iMin]) 
          584  +    ){
   565    585         sampleInsert(p, &p->current, 0);
   566    586       }
   567    587     }
   568    588   #endif
   569    589   }
   570    590   
   571    591   /*
................................................................................
   631    651         sampleInsert(p, &p->current, p->nCol-1);
   632    652         p->current.isPSample = 0;
   633    653       }
   634    654   
   635    655       /* Update the aBest[] array. */
   636    656       for(i=0; i<(p->nCol-1); i++){
   637    657         p->current.iCol = i;
   638         -      if( i>=iChng || sampleIsBetter(&p->current, &p->aBest[i]) ){
          658  +      if( i>=iChng || sampleIsBetterPost(p, &p->current, &p->aBest[i]) ){
   639    659           sampleCopy(p, &p->aBest[i], &p->current);
   640    660         }
   641    661       }
   642    662     }
   643    663   #endif
   644    664   }
   645    665   static const FuncDef statPushFuncdef = {

Changes to test/analyze9.test.

   240    240       neq,
   241    241       lrange(nlt, 0, 2),
   242    242       lrange(ndlt, 0, 2),
   243    243       lrange(test_decode(sample), 0, 1)
   244    244       FROM sqlite_stat4
   245    245     ORDER BY rowid DESC LIMIT 2;
   246    246   } {
   247         -  {2 1 1 1} {295 296 296} {120 122 125} {201 4} 
          247  +  {2 1 1 1} {295 295 295} {120 121 124} {201 3} 
   248    248     {5 3 1 1} {290 290 292} {119 119 121} {200 1}
   249    249   }
   250    250   
   251    251   do_execsql_test 4.4 { SELECT count(DISTINCT c) FROM t1 WHERE c<201 } 120
   252    252   do_execsql_test 4.5 { SELECT count(DISTINCT c) FROM t1 WHERE c<200 } 119
   253    253   
   254    254   # Check that the perioidic samples are present.
................................................................................
   911    911       execsql {
   912    912         CREATE TABLE t1(x, y);
   913    913         CREATE VIEW v1 AS SELECT * FROM t1;
   914    914       }
   915    915       catchsql ANALYZE
   916    916     } {1 {not authorized}}
   917    917   }
          918  +
          919  +#-------------------------------------------------------------------------
          920  +#
          921  +reset_db
          922  +proc r {args} { expr rand() }
          923  +db func r r
          924  +db func lrange lrange
          925  +do_test 20.1 {
          926  +  execsql {
          927  +    CREATE TABLE t1(a,b,c,d);
          928  +    CREATE INDEX i1 ON t1(a,b,c,d);
          929  +  }
          930  +  for {set i 0} {$i < 16} {incr i} {
          931  +    execsql {
          932  +      INSERT INTO t1 VALUES($i, r(), r(), r());
          933  +      INSERT INTO t1 VALUES($i, $i,  r(), r());
          934  +      INSERT INTO t1 VALUES($i, $i,  $i,  r());
          935  +      INSERT INTO t1 VALUES($i, $i,  $i,  $i);
          936  +      INSERT INTO t1 VALUES($i, $i,  $i,  $i);
          937  +      INSERT INTO t1 VALUES($i, $i,  $i,  r());
          938  +      INSERT INTO t1 VALUES($i, $i,  r(), r());
          939  +      INSERT INTO t1 VALUES($i, r(), r(), r());
          940  +    }
          941  +  }
          942  +} {}
          943  +do_execsql_test 20.2 { ANALYZE }
          944  +for {set i 0} {$i<16} {incr i} {
          945  +    set val "$i $i $i $i"
          946  +    do_execsql_test 20.3.$i {
          947  +      SELECT count(*) FROM sqlite_stat4 
          948  +      WHERE lrange(test_decode(sample), 0, 3)=$val
          949  +    } {1}
          950  +}
   918    951   
   919    952   finish_test
   920    953