/ Check-in [2daab6bd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge together the fork in the query-planner-tweaks branch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | query-planner-tweaks
Files: files | file ages | folders
SHA1: 2daab6bd42b34b32de46db513437cc4d6ca17975
User & Date: drh 2011-08-06 02:03:10
Context
2011-08-06
19:48
The sqlite_stat2.cnt field is parsed if it is present. But it is not yet used. A large comment added to analyze.c to explain the format of the ANALYZE system tables. check-in: 6d1e2372 user: drh tags: query-planner-tweaks
02:03
Merge together the fork in the query-planner-tweaks branch. check-in: 2daab6bd user: drh tags: query-planner-tweaks
01:22
Fix a compiler warning when STAT2 is off. More test cases. Fix legacy tests to deal with the new STAT2 logic. check-in: 7e914aa9 user: drh tags: query-planner-tweaks
2011-08-05
01:09
Allow the sqlite_stat2 table to contain a number of samples other than the default of 10. check-in: b9d41c34 user: drh tags: query-planner-tweaks
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
...
636
637
638
639
640
641
642

643
644
645
646
647
648
649
...
665
666
667
668
669
670
671
672

673
674
675
676
677
678
679
680
681
682
683



684
685

686
687
688
689
690
691

692

693
694
695


696
697
698
699

700
701
702
703

704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
** If the Index.aSample variable is not NULL, delete the aSample[] array
** and its contents.
*/
void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
#ifdef SQLITE_ENABLE_STAT2
  if( pIdx->aSample ){
    int j;
    for(j=0; j<SQLITE_INDEX_SAMPLES; j++){
      IndexSample *p = &pIdx->aSample[j];
      if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
        sqlite3DbFree(db, p->u.z);
      }
    }
    sqlite3DbFree(db, pIdx->aSample);
  }
................................................................................
  /* Clear any prior statistics */
  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DefaultRowEst(pIdx);
    sqlite3DeleteIndexSamples(db, pIdx);
    pIdx->aSample = 0;

  }

  /* Check to make sure the sqlite_stat1 table exists */
  sInfo.db = db;
  sInfo.zDatabase = db->aDb[iDb].zName;
  if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){
    return SQLITE_ERROR;
................................................................................
  if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){
    rc = SQLITE_ERROR;
  }
  if( rc==SQLITE_OK ){
    sqlite3_stmt *pStmt = 0;

    zSql = sqlite3MPrintf(db, 
        "SELECT idx,sampleno,sample FROM %Q.sqlite_stat2", sInfo.zDatabase);

    if( !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
      sqlite3DbFree(db, zSql);
    }

    if( rc==SQLITE_OK ){
      while( sqlite3_step(pStmt)==SQLITE_ROW ){
        char *zIndex;   /* Index name */
        Index *pIdx;    /* Pointer to the index object */




        zIndex = (char *)sqlite3_column_text(pStmt, 0);

        pIdx = zIndex ? sqlite3FindIndex(db, zIndex, sInfo.zDatabase) : 0;
        if( pIdx ){
          int iSample = sqlite3_column_int(pStmt, 1);
          if( iSample<SQLITE_INDEX_SAMPLES && iSample>=0 ){
            int eType = sqlite3_column_type(pStmt, 2);


            if( pIdx->aSample==0 ){

              static const int sz = sizeof(IndexSample)*SQLITE_INDEX_SAMPLES;
              pIdx->aSample = (IndexSample *)sqlite3DbMallocRaw(0, sz);
              if( pIdx->aSample==0 ){


                db->mallocFailed = 1;
                break;
              }
	      memset(pIdx->aSample, 0, sz);

            }

            assert( pIdx->aSample );
            {

              IndexSample *pSample = &pIdx->aSample[iSample];
              pSample->eType = (u8)eType;
              if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
                pSample->u.r = sqlite3_column_double(pStmt, 2);
              }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
                const char *z = (const char *)(
                    (eType==SQLITE_BLOB) ?
                    sqlite3_column_blob(pStmt, 2):
                    sqlite3_column_text(pStmt, 2)
                );
                int n = sqlite3_column_bytes(pStmt, 2);
                if( n>24 ){
                  n = 24;
                }
                pSample->nByte = (u8)n;
                if( n < 1){
                  pSample->u.z = 0;
                }else{
                  pSample->u.z = sqlite3DbStrNDup(0, z, n);
                  if( pSample->u.z==0 ){
                    db->mallocFailed = 1;
                    break;
                  }
                }
              }
            }
          }
        }
      }
      rc = sqlite3_finalize(pStmt);
    }
  }







|







 







>







 







|
>











>
>
>


>
|
|
|
<
<
<
>
|
>
|
<
<
>
>
|
|
|
|
>
|
<
<
<
>
|
|
|
|
|
|
|
|
|
|
|
|
<
<
|
|
|
|
|
|
|
|
<
<
<







586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
...
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694



695
696
697
698


699
700
701
702
703
704
705
706



707
708
709
710
711
712
713
714
715
716
717
718
719


720
721
722
723
724
725
726
727



728
729
730
731
732
733
734
** If the Index.aSample variable is not NULL, delete the aSample[] array
** and its contents.
*/
void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
#ifdef SQLITE_ENABLE_STAT2
  if( pIdx->aSample ){
    int j;
    for(j=0; j<pIdx->nSample; j++){
      IndexSample *p = &pIdx->aSample[j];
      if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
        sqlite3DbFree(db, p->u.z);
      }
    }
    sqlite3DbFree(db, pIdx->aSample);
  }
................................................................................
  /* Clear any prior statistics */
  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DefaultRowEst(pIdx);
    sqlite3DeleteIndexSamples(db, pIdx);
    pIdx->aSample = 0;
    pIdx->nSample = 0;
  }

  /* Check to make sure the sqlite_stat1 table exists */
  sInfo.db = db;
  sInfo.zDatabase = db->aDb[iDb].zName;
  if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){
    return SQLITE_ERROR;
................................................................................
  if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){
    rc = SQLITE_ERROR;
  }
  if( rc==SQLITE_OK ){
    sqlite3_stmt *pStmt = 0;

    zSql = sqlite3MPrintf(db, 
        "SELECT idx, sampleno, sample FROM %Q.sqlite_stat2"
        " ORDER BY rowid DESC", sInfo.zDatabase);
    if( !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
      sqlite3DbFree(db, zSql);
    }

    if( rc==SQLITE_OK ){
      while( sqlite3_step(pStmt)==SQLITE_ROW ){
        char *zIndex;   /* Index name */
        Index *pIdx;    /* Pointer to the index object */
        int iSample;
        int eType;
        IndexSample *pSample;

        zIndex = (char *)sqlite3_column_text(pStmt, 0);
        if( zIndex==0 ) continue;
        pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase);
        if( pIdx==0 ) continue;
        iSample = sqlite3_column_int(pStmt, 1);



        if( iSample>=SQLITE_MAX_SAMPLES || iSample<0 ) continue;
        if( pIdx->nSample<=iSample ){
          IndexSample *pNew;
          int sz = sizeof(IndexSample)*(iSample+1);


          pNew = (IndexSample*)sqlite3Realloc(pIdx->aSample, sz);
          if( pNew==0 ){
            db->mallocFailed = 1;
            break;
          }
          pIdx->aSample = pNew;
          pIdx->nSample = iSample+1;
        }



        eType = sqlite3_column_type(pStmt, 2);
        pSample = &pIdx->aSample[iSample];
        pSample->eType = (u8)eType;
        if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
          pSample->u.r = sqlite3_column_double(pStmt, 2);
        }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
          const char *z = (const char *)(
             (eType==SQLITE_BLOB) ?
              sqlite3_column_blob(pStmt, 2):
              sqlite3_column_text(pStmt, 2)
          );
          int n = sqlite3_column_bytes(pStmt, 2);
          if( n>24 ) n = 24;


          pSample->nByte = (u8)n;
          if( n < 1){
            pSample->u.z = 0;
          }else{
            pSample->u.z = sqlite3DbStrNDup(0, z, n);
            if( pSample->u.z==0 ){
              db->mallocFailed = 1;
              break;



            }
          }
        }
      }
      rc = sqlite3_finalize(pStmt);
    }
  }

Changes to src/sqliteInt.h.

77
78
79
80
81
82
83

84


85
86
87
88
89
90
91
....
1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
#endif

/*
** The number of samples of an index that SQLite takes in order to 
** construct a histogram of the table content when running ANALYZE
** and with SQLITE_ENABLE_STAT2
*/

#define SQLITE_INDEX_SAMPLES 10



/*
** The following macros are used to cast pointers to integers and
** integers to pointers.  The way you do this varies from one compiler
** to the next, so we have developed the following set of #if statements
** to generate appropriate macros for a wide range of compilers.
**
................................................................................
  int *aiColumn;   /* Which columns are used by this index.  1st is 0 */
  unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
  Table *pTable;   /* The SQL table being indexed */
  int tnum;        /* Page containing root of this index in database file */
  u8 onError;      /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  u8 autoIndex;    /* True if is automatically created (ex: by UNIQUE) */
  u8 bUnordered;   /* Use this index for == or IN queries only */

  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
  IndexSample *aSample;    /* Array of SQLITE_INDEX_SAMPLES samples */
};







>
|
>
>







 







>







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
....
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
#endif

/*
** The number of samples of an index that SQLite takes in order to 
** construct a histogram of the table content when running ANALYZE
** and with SQLITE_ENABLE_STAT2
*/
#ifndef SQLITE_INDEX_SAMPLES
# define SQLITE_INDEX_SAMPLES 10
#endif
#define SQLITE_MAX_SAMPLES 100

/*
** The following macros are used to cast pointers to integers and
** integers to pointers.  The way you do this varies from one compiler
** to the next, so we have developed the following set of #if statements
** to generate appropriate macros for a wide range of compilers.
**
................................................................................
  int *aiColumn;   /* Which columns are used by this index.  1st is 0 */
  unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
  Table *pTable;   /* The SQL table being indexed */
  int tnum;        /* Page containing root of this index in database file */
  u8 onError;      /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  u8 autoIndex;    /* True if is automatically created (ex: by UNIQUE) */
  u8 bUnordered;   /* Use this index for == or IN queries only */
  u8 nSample;      /* Number of slots in aSample[] */
  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
  IndexSample *aSample;    /* Array of SQLITE_INDEX_SAMPLES samples */
};

Changes to src/where.c.

2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
....
2449
2450
2451
2452
2453
2454
2455

2456
2457
2458

2459
2460
2461

2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
....
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
....
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
....
2622
2623
2624
2625
2626
2627
2628

2629
2630
2631
2632
2633
2634
2635
2636
....
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667

2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
....
2714
2715
2716
2717
2718
2719
2720

2721
2722
2723
2724
2725
2726
2727
....
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
....
2772
2773
2774
2775
2776
2777
2778
2779

2780
2781
2782
2783

2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
....
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
  */
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
** stored in Index.aSample. These samples divide the domain of values stored
** the index into (SQLITE_INDEX_SAMPLES+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples.  Region 2 contains
** values between samples 2 and 3.  And so on.  Region SQLITE_INDEX_SAMPLES
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.
**
** If successful, this function determines which of the regions value 
** pVal lies in, sets *piRegion to the region index (a value between 0
** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
** Or, if an OOM occurs while converting text values between encodings,
** SQLITE_NOMEM is returned and *piRegion is undefined.
*/
#ifdef SQLITE_ENABLE_STAT2
static int whereRangeRegion(
  Parse *pParse,              /* Database connection */
  Index *pIdx,                /* Index to consider domain of */
................................................................................
  sqlite3_value *pVal,        /* Value to consider */
  int roundUp,                /* Return largest valid region if true */
  int *piRegion               /* OUT: Region of domain in which value lies */
){
  assert( roundUp==0 || roundUp==1 );
  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->aSample;

    int i = 0;
    int eType = sqlite3_value_type(pVal);


    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){

        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT ) break;
        if( roundUp ){
          if( aSample[i].u.r>r ) break;
        }else{
          if( aSample[i].u.r>=r ) break;
        }
      }
    }else if( eType==SQLITE_NULL ){
      i = 0;
      if( roundUp ){
        while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++;
      }
    }else{ 
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;

................................................................................
        if( !z ){
          return SQLITE_NOMEM;
        }
        assert( z && pColl && pColl->xCmp );
      }
      n = sqlite3ValueBytes(pVal, pColl->enc);

      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        int c;
        int eSampletype = aSample[i].eType;
        if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
        if( (eSampletype!=eType) ) break;
#ifndef SQLITE_OMIT_UTF16
        if( pColl->enc!=SQLITE_UTF8 ){
          int nSample;
................................................................................
        {
          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( c-roundUp>=0 ) break;
      }
    }

    assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
    *piRegion = i;
  }
  return SQLITE_OK;
}
#endif   /* #ifdef SQLITE_ENABLE_STAT2 */

/*
................................................................................
#ifdef SQLITE_ENABLE_STAT2

  if( nEq==0 && p->aSample ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;

    int iUpper = SQLITE_INDEX_SAMPLES;
    int roundUpUpper = 0;
    int roundUpLower = 0;
    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
................................................................................
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      }
    }
    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));

    iEst = iUpper - iLower;
    testcase( iEst==SQLITE_INDEX_SAMPLES );
    assert( iEst<=SQLITE_INDEX_SAMPLES );

    if( iEst<1 ){
      *piEst = 50/SQLITE_INDEX_SAMPLES;
    }else{
      *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
    }
    sqlite3ValueFree(pLowerVal);
    sqlite3ValueFree(pUpperVal);
    return rc;
  }
range_est_fallback:
#else
................................................................................
  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );

  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
................................................................................
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:
  sqlite3ValueFree(pRhs);
  return rc;
}
................................................................................
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc = SQLITE_OK;       /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */
  int nSpan = 0;            /* Number of histogram regions spanned */
  int nSingle = 0;          /* Histogram regions hit by a single value */
  int nNotFound = 0;        /* Count of values that are not constants */
  int i;                               /* Loop counter */

  u8 aSpan[SQLITE_INDEX_SAMPLES+1];    /* Histogram regions that are spanned */
  u8 aSingle[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions hit once */

  assert( p->aSample!=0 );

  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  memset(aSpan, 0, sizeof(aSpan));
  memset(aSingle, 0, sizeof(aSingle));
  for(i=0; i<pList->nExpr; i++){
    sqlite3ValueFree(pVal);
    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
    if( rc ) break;
    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
      nNotFound++;
      continue;
................................................................................
    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
    if( rc ) break;
    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
    if( rc ) break;
    if( iLower>=iUpper ){
      aSingle[iLower] = 1;
    }else{
      assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
      while( iLower<iUpper ) aSpan[iLower++] = 1;
    }
  }
  if( rc==SQLITE_OK ){
    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
      if( aSpan[i] ){
        nSpan++;
      }else if( aSingle[i] ){
        nSingle++;
      }
    }
    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
               + nNotFound*p->aiRowEst[1];
    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
    *pnRow = nRowEst;
    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
                 nSpan, nSingle, nNotFound, nRowEst));
  }
  sqlite3ValueFree(pVal);







|

|


|










|







 







>



>


<
>











|







 







|







 







|







 







>
|







 







|









|
|
>

|

|







 







>







 







|


|







 







|
>
|
|


>

|
|







 







|




|






|







2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
....
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462

2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
....
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
....
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
....
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
....
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
....
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
....
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
....
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
....
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
  */
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** pIdx->nSample evenly spaced samples of the first indexed column
** stored in Index.aSample. These samples divide the domain of values stored
** the index into (pIdx->nSample+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples.  Region 2 contains
** values between samples 2 and 3.  And so on.  Region pIdx->nSample
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.
**
** If successful, this function determines which of the regions value 
** pVal lies in, sets *piRegion to the region index (a value between 0
** and S+1, inclusive) and returns SQLITE_OK.
** Or, if an OOM occurs while converting text values between encodings,
** SQLITE_NOMEM is returned and *piRegion is undefined.
*/
#ifdef SQLITE_ENABLE_STAT2
static int whereRangeRegion(
  Parse *pParse,              /* Database connection */
  Index *pIdx,                /* Index to consider domain of */
................................................................................
  sqlite3_value *pVal,        /* Value to consider */
  int roundUp,                /* Return largest valid region if true */
  int *piRegion               /* OUT: Region of domain in which value lies */
){
  assert( roundUp==0 || roundUp==1 );
  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->aSample;
    int nSample = pIdx->nSample;
    int i = 0;
    int eType = sqlite3_value_type(pVal);

    assert( nSample>0 );
    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);

      for(i=0; i<nSample; i++){
        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT ) break;
        if( roundUp ){
          if( aSample[i].u.r>r ) break;
        }else{
          if( aSample[i].u.r>=r ) break;
        }
      }
    }else if( eType==SQLITE_NULL ){
      i = 0;
      if( roundUp ){
        while( i<nSample && aSample[i].eType==SQLITE_NULL ) i++;
      }
    }else{ 
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;

................................................................................
        if( !z ){
          return SQLITE_NOMEM;
        }
        assert( z && pColl && pColl->xCmp );
      }
      n = sqlite3ValueBytes(pVal, pColl->enc);

      for(i=0; i<nSample; i++){
        int c;
        int eSampletype = aSample[i].eType;
        if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
        if( (eSampletype!=eType) ) break;
#ifndef SQLITE_OMIT_UTF16
        if( pColl->enc!=SQLITE_UTF8 ){
          int nSample;
................................................................................
        {
          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( c-roundUp>=0 ) break;
      }
    }

    assert( i>=0 && i<=pIdx->nSample );
    *piRegion = i;
  }
  return SQLITE_OK;
}
#endif   /* #ifdef SQLITE_ENABLE_STAT2 */

/*
................................................................................
#ifdef SQLITE_ENABLE_STAT2

  if( nEq==0 && p->aSample ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;
    int nSample = p->nSample;
    int iUpper = p->nSample;
    int roundUpUpper = 0;
    int roundUpLower = 0;
    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
................................................................................
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      if( pUpper ) iUpper = (iLower + p->nSample + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      }
    }
    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));

    iEst = iUpper - iLower;
    testcase( iEst==nSample );
    assert( iEst<=nSample );
    assert( nSample>0 );
    if( iEst<1 ){
      *piEst = 50/nSample;
    }else{
      *piEst = (iEst*100)/nSample;
    }
    sqlite3ValueFree(pLowerVal);
    sqlite3ValueFree(pUpperVal);
    return rc;
  }
range_est_fallback:
#else
................................................................................
  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  assert( p->nSample>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
................................................................................
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(p->nSample*3);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->nSample;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:
  sqlite3ValueFree(pRhs);
  return rc;
}
................................................................................
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc = SQLITE_OK;       /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */
  int nSpan = 0;            /* Number of histogram regions spanned */
  int nSingle = 0;          /* Histogram regions hit by a single value */
  int nNotFound = 0;        /* Count of values that are not constants */
  int i;                             /* Loop counter */
  int nSample = p->nSample;          /* Number of samples */
  u8 aSpan[SQLITE_MAX_SAMPLES+1];    /* Histogram regions that are spanned */
  u8 aSingle[SQLITE_MAX_SAMPLES+1];  /* Histogram regions hit once */

  assert( p->aSample!=0 );
  assert( nSample>0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  memset(aSpan, 0, nSample+1);
  memset(aSingle, 0, nSample+1);
  for(i=0; i<pList->nExpr; i++){
    sqlite3ValueFree(pVal);
    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
    if( rc ) break;
    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
      nNotFound++;
      continue;
................................................................................
    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
    if( rc ) break;
    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
    if( rc ) break;
    if( iLower>=iUpper ){
      aSingle[iLower] = 1;
    }else{
      assert( iLower>=0 && iUpper<=nSample );
      while( iLower<iUpper ) aSpan[iLower++] = 1;
    }
  }
  if( rc==SQLITE_OK ){
    for(i=nSpan=0; i<=nSample; i++){
      if( aSpan[i] ){
        nSpan++;
      }else if( aSingle[i] ){
        nSingle++;
      }
    }
    nRowEst = (nSpan*3+nSingle)*p->aiRowEst[0]/(3*nSample)
               + nNotFound*p->aiRowEst[1];
    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
    *pnRow = nRowEst;
    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
                 nSpan, nSingle, nNotFound, nRowEst));
  }
  sqlite3ValueFree(pVal);

Changes to test/analyze5.test.

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   50
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   50
  107  {z=-10.0}             t1z   50
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   50
  113  {z=1.5}               t1z   50
  114  {z=2.5}               t1z   50
  
  201  {z IN (-1)}           t1z   50
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   50
  207  {z IN (0.5)}          t1z   50
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
................................................................................
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {







|




|
|




|
|
|

|




|
|







|
|


|
|







 







|

|







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   33
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   33
  107  {z=-10.0}             t1z   33
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   33
  113  {z=1.5}               t1z   33
  114  {z=2.5}               t1z   33
  
  201  {z IN (-1)}           t1z   33
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   33
  207  {z IN (0.5)}          t1z   33
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  133
  216  {z=-1 OR z=3}         t1z  133

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   33
  302  {y=0.1}               t1y   33

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
................................................................................
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   3
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  33
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {

Changes to test/analyze7.test.

95
96
97
98
99
100
101
102


103
104
105
106
107
108
109
110
111
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.4 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}


} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~2 rows)}}
do_test analyze7-3.5 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}

finish_test







|
>
>
|








95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.4 {
  set x [execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}]
  regsub -all {[bcd]+} $x {x} x
  set x
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1x (x=?) (~2 rows)}}
do_test analyze7-3.5 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}

finish_test