SQLite

Check-in [4a5d9550bd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Incremental code and comment cleanup in where.c. There is more to be done.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 4a5d9550bdc08633535a7869d7748f56ac3e9a36
User & Date: drh 2009-08-20 13:45:08.000
Context
2009-08-20
16:11
Change the code that collects samples for sqlite_stat2 so that the first sample taken is the (nRow/(2*SQLITE_INDEX_SAMPLES))th entry in the index, where nRow is the total number of index entries. (check-in: cbfe6e9df3 user: dan tags: trunk)
13:45
Incremental code and comment cleanup in where.c. There is more to be done. (check-in: 4a5d9550bd user: drh tags: trunk)
02:49
Set the "type" correctly of built-in BINARY collating sequences for UTF16. (check-in: 167644f33c user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/sqliteInt.h.
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
** If compiling for a processor that lacks floating point support,
** substitute integer for floating-point
*/
#ifdef SQLITE_OMIT_FLOATING_POINT
# define double sqlite_int64
# define LONGDOUBLE_TYPE sqlite_int64
# ifndef SQLITE_BIG_DBL
#   define SQLITE_BIG_DBL (((sqlite3_int64)1)<<60)
# endif
# define SQLITE_OMIT_DATETIME_FUNCS 1
# define SQLITE_OMIT_TRACE 1
# undef SQLITE_MIXED_ENDIAN_64BIT_FLOAT
# undef SQLITE_HAVE_ISNAN
#endif
#ifndef SQLITE_BIG_DBL







|







302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
** If compiling for a processor that lacks floating point support,
** substitute integer for floating-point
*/
#ifdef SQLITE_OMIT_FLOATING_POINT
# define double sqlite_int64
# define LONGDOUBLE_TYPE sqlite_int64
# ifndef SQLITE_BIG_DBL
#   define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50)
# endif
# define SQLITE_OMIT_DATETIME_FUNCS 1
# define SQLITE_OMIT_TRACE 1
# undef SQLITE_MIXED_ENDIAN_64BIT_FLOAT
# undef SQLITE_HAVE_ISNAN
#endif
#ifndef SQLITE_BIG_DBL
Changes to src/where.c.
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929




1930
1931
1932
1933
1934
1935
1936

    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;
      }
    }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;




      if( eType==SQLITE_BLOB ){
        z = (const u8 *)sqlite3_value_blob(pVal);
        pColl = db->pDfltColl;
        assert( pColl->enc==SQLITE_UTF8 );
      }else{
        pColl = sqlite3GetCollSeq(db, SQLITE_UTF8, 0, *pIdx->azColl);
        if( pColl==0 ){







|




>
>
>
>







1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940

    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;
      }
    }else{ 
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;

      /* pVal comes from sqlite3ValueFromExpr() so the type cannot be NULL */
      assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB );

      if( eType==SQLITE_BLOB ){
        z = (const u8 *)sqlite3_value_blob(pVal);
        pColl = db->pDfltColl;
        assert( pColl->enc==SQLITE_UTF8 );
      }else{
        pColl = sqlite3GetCollSeq(db, SQLITE_UTF8, 0, *pIdx->azColl);
        if( pColl==0 ){
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
**
**   ... FROM t1 WHERE a > ? AND a < ? ...
**                    |_____|   |_____|
**                       |         |
**                     pLower    pUpper
**
** If the upper or lower bound is not present, then NULL should be passed in
** place of a WhereTerm.
**
** The nEq parameter is passed the index of the index column subject to the
** range constraint. Or, equivalently, the number of equality constraints
** optimized by the proposed index scan. For example, assuming index p is
** on t1(a, b), and the SQL query is:
**
**   ... FROM t1 WHERE a = ? AND b > ? AND b < ? ...







|







1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
**
**   ... FROM t1 WHERE a > ? AND a < ? ...
**                    |_____|   |_____|
**                       |         |
**                     pLower    pUpper
**
** If the upper or lower bound is not present, then NULL should be passed in
** place of the corresponding WhereTerm.
**
** The nEq parameter is passed the index of the index column subject to the
** range constraint. Or, equivalently, the number of equality constraints
** optimized by the proposed index scan. For example, assuming index p is
** on t1(a, b), and the SQL query is:
**
**   ... FROM t1 WHERE a = ? AND b > ? AND b < ? ...
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
** value of 1 indicates that the proposed range scan is expected to visit
** approximately 1/9 (11%) of the rows selected by the nEq equality constraints
** (if any). A return value of 9 indicates that it is expected that the
** range scan will visit 9/9 (100%) of the rows selected by the equality
** constraints.
*/
static int whereRangeScanEst(
  Parse *pParse,
  Index *p, 
  int nEq, 
  WhereTerm *pLower, 
  WhereTerm *pUpper,
  int *piEst                      /* OUT: Return value */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT2
  sqlite3 *db = pParse->db;
  sqlite3_value *pLowerVal = 0;
  sqlite3_value *pUpperVal = 0;







|
|
|
|
|
|







2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
** value of 1 indicates that the proposed range scan is expected to visit
** approximately 1/9 (11%) of the rows selected by the nEq equality constraints
** (if any). A return value of 9 indicates that it is expected that the
** range scan will visit 9/9 (100%) of the rows selected by the equality
** constraints.
*/
static int whereRangeScanEst(
  Parse *pParse,       /* Parsing & code generating context */
  Index *p,            /* The index containing the range-compared column; "x" */
  int nEq,             /* index into p->aCol[] of the range-compared column */
  WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
  int *piEst           /* OUT: Return value */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT2
  sqlite3 *db = pParse->db;
  sqlite3_value *pLowerVal = 0;
  sqlite3_value *pUpperVal = 0;
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116

2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131

2132
2133
2134
2135


2136
2137
2138
2139
2140

2141
2142

2143
2144
2145




2146

2147


2148
2149
2150
2151
2152
2153
2154
2155
2156
2157

2158
2159
2160
2161
2162
2163
2164
  WhereCost *pCost            /* Lowest cost query plan */
){
  int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  Index *pProbe;              /* An index we are evaluating */
  Index *pIdx;                /* Copy of pProbe, or zero for IPK index */
  int eqTermMask;             /* Current mask of valid equality operators */
  int idxEqTermMask;          /* Index mask of valid equality operators */

  Index pk;
  unsigned int pkint[2] = {1000000, 1};
  int pkicol = -1;
  int wsFlagMask;


  memset(pCost, 0, sizeof(*pCost));
  pCost->rCost = SQLITE_BIG_DBL;

  /* If the pSrc table is the right table of a LEFT JOIN then we may not
  ** use an index to satisfy IS NULL constraints on that table.  This is
  ** because columns might end up being NULL if the table does not match -
  ** a circumstance which the index cannot help us discover.  Ticket #2177.
  */
  if( pSrc->jointype & JT_LEFT ){
    idxEqTermMask = WO_EQ|WO_IN;
  }else{
    idxEqTermMask = WO_EQ|WO_IN|WO_ISNULL;
  }

  if( pSrc->pIndex ){

    pIdx = pProbe = pSrc->pIndex;
    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
    eqTermMask = idxEqTermMask;
  }else{


    Index *pFirst = pSrc->pTab->pIndex;
    memset(&pk, 0, sizeof(Index));
    pk.nColumn = 1;
    pk.aiColumn = &pkicol;
    pk.aiRowEst = pkint;

    pk.onError = OE_Replace;
    pk.pTable = pSrc->pTab;

    if( pSrc->notIndexed==0 ){
      pk.pNext = pFirst;
    }




    if( pFirst && pFirst->aiRowEst ){

      pkint[0] = pFirst->aiRowEst[0];


    }
    pProbe = &pk;
    wsFlagMask = ~(
        WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
    );
    eqTermMask = WO_EQ|WO_IN;
    pIdx = 0;
  }



  for(; pProbe; pIdx=pProbe=pProbe->pNext){
    const unsigned int * const aiRowEst = pProbe->aiRowEst;
    double cost;                /* Cost of using pProbe */
    double nRow;                /* Estimated number of rows in result set */
    int rev;                    /* True to scan in reverse order */
    int wsFlags = 0;
    Bitmask used = 0;







|
<
|
|
|

>















>




>
>
|
|
|
|
|
>
|
|
>

|

>
>
>
>
|
>
|
>
>

|







|
>







2108
2109
2110
2111
2112
2113
2114
2115

2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
  WhereCost *pCost            /* Lowest cost query plan */
){
  int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  Index *pProbe;              /* An index we are evaluating */
  Index *pIdx;                /* Copy of pProbe, or zero for IPK index */
  int eqTermMask;             /* Current mask of valid equality operators */
  int idxEqTermMask;          /* Index mask of valid equality operators */
  Index sPk;                  /* A fake index object for the primary key */

  unsigned int aiRowEstPk[2]; /* The aiRowEst[] value for the sPk index */
  int aiColumnPk = -1;        /* The aColumn[] value for the sPk index */
  int wsFlagMask;             /* Allowed flags in pCost->plan.wsFlag */

  /* Initialize the cost to a worst-case value */
  memset(pCost, 0, sizeof(*pCost));
  pCost->rCost = SQLITE_BIG_DBL;

  /* If the pSrc table is the right table of a LEFT JOIN then we may not
  ** use an index to satisfy IS NULL constraints on that table.  This is
  ** because columns might end up being NULL if the table does not match -
  ** a circumstance which the index cannot help us discover.  Ticket #2177.
  */
  if( pSrc->jointype & JT_LEFT ){
    idxEqTermMask = WO_EQ|WO_IN;
  }else{
    idxEqTermMask = WO_EQ|WO_IN|WO_ISNULL;
  }

  if( pSrc->pIndex ){
    /* An INDEXED BY clause specifies a particular index to use */
    pIdx = pProbe = pSrc->pIndex;
    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
    eqTermMask = idxEqTermMask;
  }else{
    /* There is no INDEXED BY clause.  Create a fake Index object to
    ** represent the primary key */
    Index *pFirst;                /* Any other index on the table */
    memset(&sPk, 0, sizeof(Index));
    sPk.nColumn = 1;
    sPk.aiColumn = &aiColumnPk;
    sPk.aiRowEst = aiRowEstPk;
    aiRowEstPk[1] = 1;
    sPk.onError = OE_Replace;
    sPk.pTable = pSrc->pTab;
    pFirst = pSrc->pTab->pIndex;
    if( pSrc->notIndexed==0 ){
      sPk.pNext = pFirst;
    }
    /* The aiRowEstPk[0] is an estimate of the total number of rows in the
    ** table.  Get this information from the ANALYZE information if it is
    ** available.  If not available, assume the table 1 million rows in size.
    */
    if( pFirst ){
      assert( pFirst->aiRowEst!=0 ); /* Allocated together with pFirst */
      aiRowEstPk[0] = pFirst->aiRowEst[0];
    }else{
      aiRowEstPk[0] = 1000000;
    }
    pProbe = &sPk;
    wsFlagMask = ~(
        WHERE_COLUMN_IN|WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_RANGE
    );
    eqTermMask = WO_EQ|WO_IN;
    pIdx = 0;
  }

  /* Loop over all indices looking for the best one to use
  */
  for(; pProbe; pIdx=pProbe=pProbe->pNext){
    const unsigned int * const aiRowEst = pProbe->aiRowEst;
    double cost;                /* Cost of using pProbe */
    double nRow;                /* Estimated number of rows in result set */
    int rev;                    /* True to scan in reverse order */
    int wsFlags = 0;
    Bitmask used = 0;
2190
2191
2192
2193
2194
2195
2196
2197

2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
    **    the sub-select is assumed to return 25 rows for the purposes of 
    **    determining nInMul.
    **
    **  bInEst:  
    **    Set to true if there was at least one "x IN (SELECT ...)" term used 
    **    in determining the value of nInMul.
    **
    **  nBound:  

    **    Set based on whether or not there is a range constraint on the 
    **    (nEq+1)th column of the index. 1 if there is neither an upper or 
    **    lower bound, 3 if there is an upper or lower bound, or 9 if there 
    **    is both an upper and lower bound.
    **
    **  bSort:   
    **    Boolean. True if there is an ORDER BY clause that will require an 
    **    external sort (i.e. scanning the index being evaluated will not 
    **    correctly order records).
    **
    **  bLookup: 







|
>
|
|
<
|







2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217

2218
2219
2220
2221
2222
2223
2224
2225
    **    the sub-select is assumed to return 25 rows for the purposes of 
    **    determining nInMul.
    **
    **  bInEst:  
    **    Set to true if there was at least one "x IN (SELECT ...)" term used 
    **    in determining the value of nInMul.
    **
    **  nBound:
    **    An estimate on the amount of the table that must be searched due
    **    to a range constraint.  The value is between 1 and 9 and indicates
    **    9ths of the table.  1 means that about 1/9th of the is searched.

    **    9 indicates that the entire table is searched.
    **
    **  bSort:   
    **    Boolean. True if there is an ORDER BY clause that will require an 
    **    external sort (i.e. scanning the index being evaluated will not 
    **    correctly order records).
    **
    **  bLookup: 
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324

2325
2326
2327
2328
2329





2330




2331
2332



2333
2334
2335





2336
2337
2338
2339

2340
2341
2342
2343
2344
2345
2346
2347



2348
2349
2350
2351
2352
2353
2354
2355
2356


2357


2358
2359
2360
2361
2362
2363
2364
      if( m==0 ){
        wsFlags |= WHERE_IDX_ONLY;
      }else{
        bLookup = 1;
      }
    }

#if 0
    if( bInEst && (nInMul*aiRowEst[nEq])>(aiRowEst[0]/2) ){
      nInMul = aiRowEst[0] / (2 * aiRowEst[nEq]);
    }
    nRow = (double)(aiRowEst[nEq] * nInMul) / nBound;
    cost = (nEq>0) * nInMul * estLog(aiRowEst[0])
         + nRow
         + bSort * nRow * estLog(nRow)
         + bLookup * nRow * estLog(aiRowEst[0]);
#else

    /* The following block calculates nRow and cost for the index scan
    ** in the same way as SQLite versions 3.6.17 and earlier. Some elements
    ** of this calculation are difficult to justify. But using this strategy
    ** works well in practice and causes the test suite to pass.  */

    nRow = (double)(aiRowEst[nEq] * nInMul);
    if( bInEst && nRow*2>aiRowEst[0] ){
      nRow = aiRowEst[0]/2;
      nInMul = nRow / aiRowEst[nEq];
    }





    cost = nRow + nInMul*estLog(aiRowEst[0]);




    nRow = nRow * (double)nBound / 9.0;
    cost = cost * (double)nBound / 9.0;



    if( bSort ){
      cost += cost*estLog(cost);
    }





    if( pIdx && bLookup==0 ){
      cost /= 2;
    }
#endif


    WHERETRACE((
      "tbl=%s idx=%s nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d"
      " wsFlags=%d   (nRow=%.2f cost=%.2f)\n",
      pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
      nEq, nInMul, nBound, bSort, bLookup, wsFlags, nRow, cost
    ));




    if( (!pIdx || wsFlags) && cost<pCost->rCost ){
      pCost->rCost = cost;
      pCost->nRow = nRow;
      pCost->used = used;
      pCost->plan.wsFlags = (wsFlags&wsFlagMask);
      pCost->plan.nEq = nEq;
      pCost->plan.u.pIdx = pIdx;
    }



    if( pSrc->pIndex ) break;


    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
    eqTermMask = idxEqTermMask;
  }

  /* If there is no ORDER BY clause and the SQLITE_ReverseOrder flag
  ** is set, then reverse the order that the index will be scanned
  ** in. This is used for application testing, to help find cases







|
|
|
<
<
<
<
<
<
<
|
<
<
<
<
>





>
>
>
>
>

>
>
>
>
|
|
>
>
>



>
>
>
>
>

|

<
>








>
>
>









>
>

>
>







2320
2321
2322
2323
2324
2325
2326
2327
2328
2329







2330




2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362

2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
      if( m==0 ){
        wsFlags |= WHERE_IDX_ONLY;
      }else{
        bLookup = 1;
      }
    }

    /**** Begin adding up the cost of using this index (Needs improvements)
    **
    ** Estimate the number of rows of output.  For an IN operator,







    ** do not let the estimate exceed half the rows in the table.




    */
    nRow = (double)(aiRowEst[nEq] * nInMul);
    if( bInEst && nRow*2>aiRowEst[0] ){
      nRow = aiRowEst[0]/2;
      nInMul = nRow / aiRowEst[nEq];
    }

    /* Assume constant cost to access a row and logarithmic cost to
    ** do a binary search.  Hence, the initial cost is the number of output
    ** rows plus log2(table-size) times the number of binary searches.
    */
    cost = nRow + nInMul*estLog(aiRowEst[0]);

    /* Adjust the number of rows and the cost downward to reflect rows
    ** that are excluded by range constraints.
    */
    nRow = nRow * (double)nBound / (double)9;
    cost = cost * (double)nBound / (double)9;

    /* Add in the estimated cost of sorting the result
    */
    if( bSort ){
      cost += cost*estLog(cost);
    }

    /* If all information can be taken directly from the index, we avoid
    ** doing table lookups.  This reduces the cost by half.  (Not really -
    ** this needs to be fixed.)
    */
    if( pIdx && bLookup==0 ){
      cost /= (double)2;
    }

    /**** Cost of using this index has now been computed ****/

    WHERETRACE((
      "tbl=%s idx=%s nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d"
      " wsFlags=%d   (nRow=%.2f cost=%.2f)\n",
      pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
      nEq, nInMul, nBound, bSort, bLookup, wsFlags, nRow, cost
    ));

    /* If this index is the best we have seen so far, then record this
    ** index and its cost in the pCost structure.
    */
    if( (!pIdx || wsFlags) && cost<pCost->rCost ){
      pCost->rCost = cost;
      pCost->nRow = nRow;
      pCost->used = used;
      pCost->plan.wsFlags = (wsFlags&wsFlagMask);
      pCost->plan.nEq = nEq;
      pCost->plan.u.pIdx = pIdx;
    }

    /* If there was an INDEXED BY clause, then only that one index is
    ** considered. */
    if( pSrc->pIndex ) break;

    /* Reset masks for the next index in the loop */
    wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE);
    eqTermMask = idxEqTermMask;
  }

  /* If there is no ORDER BY clause and the SQLITE_ReverseOrder flag
  ** is set, then reverse the order that the index will be scanned
  ** in. This is used for application testing, to help find cases