/ Check-in [f7ba0219]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experimental changes to improve optimization of DISTINCT queries.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1:f7ba0219ef2f235543c258be736955d91ca5ecce
User & Date: dan 2011-06-30 20:17:15
Context
2011-07-01
14:21
Improvements and tests for detection of redundant DISTINCT qualifiers. check-in: 7337293c user: dan tags: experimental
2011-06-30
20:17
Experimental changes to improve optimization of DISTINCT queries. check-in: f7ba0219 user: dan tags: experimental
2011-06-29
17:11
Pass the BTREE_UNORDERED hint into both sqlite3BtreeOpen() and into sqlite3BtreeCreateTable(). check-in: 591de898 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/delete.c.

   367    367       int iRowSet = ++pParse->nMem;   /* Register for rowset of rows to delete */
   368    368       int iRowid = ++pParse->nMem;    /* Used for storing rowid values. */
   369    369       int regRowid;                   /* Actual register containing rowids */
   370    370   
   371    371       /* Collect rowids of every row to be deleted.
   372    372       */
   373    373       sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
   374         -    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK);
          374  +    pWInfo = sqlite3WhereBegin(
          375  +        pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK
          376  +    );
   375    377       if( pWInfo==0 ) goto delete_from_cleanup;
   376    378       regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid);
   377    379       sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid);
   378    380       if( db->flags & SQLITE_CountRows ){
   379    381         sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1);
   380    382       }
   381    383       sqlite3WhereEnd(pWInfo);

Changes to src/fkey.c.

   556    556     sNameContext.pParse = pParse;
   557    557     sqlite3ResolveExprNames(&sNameContext, pWhere);
   558    558   
   559    559     /* Create VDBE to loop through the entries in pSrc that match the WHERE
   560    560     ** clause. If the constraint is not deferred, throw an exception for
   561    561     ** each row found. Otherwise, for deferred constraints, increment the
   562    562     ** deferred constraint counter by nIncr for each row selected.  */
   563         -  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0);
          563  +  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0);
   564    564     if( nIncr>0 && pFKey->isDeferred==0 ){
   565    565       sqlite3ParseToplevel(pParse)->mayAbort = 1;
   566    566     }
   567    567     sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr);
   568    568     if( pWInfo ){
   569    569       sqlite3WhereEnd(pWInfo);
   570    570     }

Changes to src/select.c.

  3717   3717     ExprList *pOrderBy;    /* The ORDER BY clause.  May be NULL */
  3718   3718     ExprList *pGroupBy;    /* The GROUP BY clause.  May be NULL */
  3719   3719     Expr *pHaving;         /* The HAVING clause.  May be NULL */
  3720   3720     int isDistinct;        /* True if the DISTINCT keyword is present */
  3721   3721     int distinct;          /* Table to use for the distinct set */
  3722   3722     int rc = 1;            /* Value to return from this function */
  3723   3723     int addrSortIndex;     /* Address of an OP_OpenEphemeral instruction */
         3724  +  int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */
  3724   3725     AggInfo sAggInfo;      /* Information used by aggregate queries */
  3725   3726     int iEnd;              /* Address of the end of the query */
  3726   3727     sqlite3 *db;           /* The database connection */
  3727   3728   
  3728   3729   #ifndef SQLITE_OMIT_EXPLAIN
  3729   3730     int iRestoreSelectId = pParse->iSelectId;
  3730   3731     pParse->iSelectId = pParse->iNextSelectId++;
................................................................................
  3846   3847       return rc;
  3847   3848     }
  3848   3849   #endif
  3849   3850   
  3850   3851     /* If possible, rewrite the query to use GROUP BY instead of DISTINCT.
  3851   3852     ** GROUP BY might use an index, DISTINCT never does.
  3852   3853     */
         3854  +#if 0
  3853   3855     assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 );
  3854   3856     if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){
  3855   3857       p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
  3856   3858       pGroupBy = p->pGroupBy;
  3857   3859       p->selFlags &= ~SF_Distinct;
  3858   3860     }
         3861  +#endif
  3859   3862   
  3860   3863     /* If there is both a GROUP BY and an ORDER BY clause and they are
  3861   3864     ** identical, then disable the ORDER BY clause since the GROUP BY
  3862   3865     ** will cause elements to come out in the correct order.  This is
  3863   3866     ** an optimization - the correct answer should result regardless.
  3864   3867     ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER
  3865   3868     ** to disable this optimization for testing purposes.
................................................................................
  3900   3903     p->nSelectRow = (double)LARGEST_INT64;
  3901   3904     computeLimitRegisters(pParse, p, iEnd);
  3902   3905   
  3903   3906     /* Open a virtual index to use for the distinct set.
  3904   3907     */
  3905   3908     if( p->selFlags & SF_Distinct ){
  3906   3909       KeyInfo *pKeyInfo;
  3907         -    assert( isAgg || pGroupBy );
  3908   3910       distinct = pParse->nTab++;
  3909   3911       pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
  3910         -    sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
  3911         -                        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
         3912  +    addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
         3913  +        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
  3912   3914       sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
  3913   3915     }else{
  3914   3916       distinct = -1;
  3915   3917     }
  3916   3918   
  3917   3919     /* Aggregate and non-aggregate queries are handled differently */
  3918   3920     if( !isAgg && pGroupBy==0 ){
  3919         -    /* This case is for non-aggregate queries
  3920         -    ** Begin the database scan
  3921         -    */
  3922         -    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0);
         3921  +    ExprList *pDist = (isDistinct ? p->pEList : 0);
         3922  +
         3923  +    /* Begin the database scan. */
         3924  +    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0);
  3923   3925       if( pWInfo==0 ) goto select_end;
  3924   3926       if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut;
  3925   3927   
  3926   3928       /* If sorting index that was created by a prior OP_OpenEphemeral 
  3927   3929       ** instruction ended up not being needed, then change the OP_OpenEphemeral
  3928   3930       ** into an OP_Noop.
  3929   3931       */
  3930   3932       if( addrSortIndex>=0 && pOrderBy==0 ){
  3931   3933         sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
  3932   3934         p->addrOpenEphm[2] = -1;
  3933   3935       }
  3934   3936   
  3935         -    /* Use the standard inner loop
  3936         -    */
  3937         -    assert(!isDistinct);
  3938         -    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest,
         3937  +    if( pWInfo->eDistinct ){
         3938  +      assert( isDistinct );
         3939  +      assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED 
         3940  +           || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE 
         3941  +      );
         3942  +      distinct = -1;
         3943  +      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){
         3944  +        int iJump;
         3945  +        int iExpr;
         3946  +        int iFlag = ++pParse->nMem;
         3947  +        int iBase = pParse->nMem+1;
         3948  +        int iBase2 = iBase + pEList->nExpr;
         3949  +        pParse->nMem += (pEList->nExpr*2);
         3950  +        VdbeOp *pOp;
         3951  +
         3952  +        /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The
         3953  +        ** OP_Integer initializes the "first row" flag.  */
         3954  +        pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
         3955  +        pOp->opcode = OP_Integer;
         3956  +        pOp->p1 = 1;
         3957  +        pOp->p2 = iFlag;
         3958  +
         3959  +        sqlite3ExprCodeExprList(pParse, pEList, iBase, 1);
         3960  +        iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1;
         3961  +        sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1);
         3962  +        for(iExpr=0; iExpr<pEList->nExpr; iExpr++){
         3963  +          CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr);
         3964  +          sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr);
         3965  +          sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
         3966  +          sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
         3967  +        }
         3968  +        sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue);
         3969  +
         3970  +        sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag);
         3971  +        assert( sqlite3VdbeCurrentAddr(v)==iJump );
         3972  +        sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr);
         3973  +      }
         3974  +    }
         3975  +
         3976  +    /* Use the standard inner loop. */
         3977  +    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest,
  3939   3978                       pWInfo->iContinue, pWInfo->iBreak);
  3940   3979   
  3941   3980       /* End the database scan loop.
  3942   3981       */
  3943   3982       sqlite3WhereEnd(pWInfo);
  3944   3983     }else{
  3945   3984       /* This is the processing for aggregate queries */
................................................................................
  4041   4080   
  4042   4081         /* Begin a loop that will extract all source rows in GROUP BY order.
  4043   4082         ** This might involve two separate loops with an OP_Sort in between, or
  4044   4083         ** it might be a single loop that uses an index to extract information
  4045   4084         ** in the right order to begin with.
  4046   4085         */
  4047   4086         sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
  4048         -      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0);
         4087  +      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0);
  4049   4088         if( pWInfo==0 ) goto select_end;
  4050   4089         if( pGroupBy==0 ){
  4051   4090           /* The optimizer is able to deliver rows in group by order so
  4052   4091           ** we do not have to sort.  The OP_OpenEphemeral table will be
  4053   4092           ** cancelled later because we still need to use the pKeyInfo
  4054   4093           */
  4055   4094           pGroupBy = p->pGroupBy;
................................................................................
  4303   4342           }
  4304   4343     
  4305   4344           /* This case runs if the aggregate has no GROUP BY clause.  The
  4306   4345           ** processing is much simpler since there is only a single row
  4307   4346           ** of output.
  4308   4347           */
  4309   4348           resetAccumulator(pParse, &sAggInfo);
  4310         -        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag);
         4349  +        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag);
  4311   4350           if( pWInfo==0 ){
  4312   4351             sqlite3ExprListDelete(db, pDel);
  4313   4352             goto select_end;
  4314   4353           }
  4315   4354           updateAccumulator(pParse, &sAggInfo);
  4316   4355           if( !pMinMax && flag ){
  4317   4356             sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iBreak);

Changes to src/sqliteInt.h.

  1962   1962   ** into the second half to give some continuity.
  1963   1963   */
  1964   1964   struct WhereInfo {
  1965   1965     Parse *pParse;       /* Parsing and code generating context */
  1966   1966     u16 wctrlFlags;      /* Flags originally passed to sqlite3WhereBegin() */
  1967   1967     u8 okOnePass;        /* Ok to use one-pass algorithm for UPDATE or DELETE */
  1968   1968     u8 untestedTerms;    /* Not all WHERE terms resolved by outer loop */
         1969  +  u8 eDistinct;
  1969   1970     SrcList *pTabList;             /* List of tables in the join */
  1970   1971     int iTop;                      /* The very beginning of the WHERE loop */
  1971   1972     int iContinue;                 /* Jump here to continue with next record */
  1972   1973     int iBreak;                    /* Jump here to break out of the loop */
  1973   1974     int nLevel;                    /* Number of nested loop */
  1974   1975     struct WhereClause *pWC;       /* Decomposition of the WHERE clause */
  1975   1976     double savedNQueryLoop;        /* pParse->nQueryLoop outside the WHERE loop */
  1976   1977     double nRowOut;                /* Estimated number of output rows */
  1977   1978     WhereLevel a[1];               /* Information about each nest loop in WHERE */
  1978   1979   };
  1979   1980   
         1981  +#define WHERE_DISTINCT_UNIQUE 1
         1982  +#define WHERE_DISTINCT_ORDERED 2
         1983  +
  1980   1984   /*
  1981   1985   ** A NameContext defines a context in which to resolve table and column
  1982   1986   ** names.  The context consists of a list of tables (the pSrcList) field and
  1983   1987   ** a list of named expression (pEList).  The named expression list may
  1984   1988   ** be NULL.  The pSrc corresponds to the FROM clause of a SELECT or
  1985   1989   ** to the table being operated on by INSERT, UPDATE, or DELETE.  The
  1986   1990   ** pEList corresponds to the result set of a SELECT and is NULL for
................................................................................
  2734   2738   int sqlite3IsReadOnly(Parse*, Table*, int);
  2735   2739   void sqlite3OpenTable(Parse*, int iCur, int iDb, Table*, int);
  2736   2740   #if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
  2737   2741   Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *, char *);
  2738   2742   #endif
  2739   2743   void sqlite3DeleteFrom(Parse*, SrcList*, Expr*);
  2740   2744   void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int);
  2741         -WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16);
         2745  +WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16);
  2742   2746   void sqlite3WhereEnd(WhereInfo*);
  2743   2747   int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int);
  2744   2748   void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int);
  2745   2749   void sqlite3ExprCodeMove(Parse*, int, int, int);
  2746   2750   void sqlite3ExprCodeCopy(Parse*, int, int, int);
  2747   2751   void sqlite3ExprCacheStore(Parse*, int, int, int);
  2748   2752   void sqlite3ExprCachePush(Parse*);

Changes to src/update.c.

   307    307     if( sqlite3ResolveExprNames(&sNC, pWhere) ){
   308    308       goto update_cleanup;
   309    309     }
   310    310   
   311    311     /* Begin the database scan
   312    312     */
   313    313     sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid);
   314         -  pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED);
          314  +  pWInfo = sqlite3WhereBegin(
          315  +      pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED
          316  +  );
   315    317     if( pWInfo==0 ) goto update_cleanup;
   316    318     okOnePass = pWInfo->okOnePass;
   317    319   
   318    320     /* Remember the rowid of every item to be updated.
   319    321     */
   320    322     sqlite3VdbeAddOp2(v, OP_Rowid, iCur, regOldRowid);
   321    323     if( !okOnePass ){

Changes to src/where.c.

   249    249   #define WHERE_IDX_ONLY     0x00800000  /* Use index only - omit table */
   250    250   #define WHERE_ORDERBY      0x01000000  /* Output will appear in correct order */
   251    251   #define WHERE_REVERSE      0x02000000  /* Scan in reverse order */
   252    252   #define WHERE_UNIQUE       0x04000000  /* Selects no more than one row */
   253    253   #define WHERE_VIRTUALTABLE 0x08000000  /* Use virtual-table processing */
   254    254   #define WHERE_MULTI_OR     0x10000000  /* OR using multiple indices */
   255    255   #define WHERE_TEMP_INDEX   0x20000000  /* Uses an ephemeral index */
          256  +#define WHERE_DISTINCT     0x40000000  /* Correct order for DISTINCT */
   256    257   
   257    258   /*
   258    259   ** Initialize a preallocated WhereClause structure.
   259    260   */
   260    261   static void whereClauseInit(
   261    262     WhereClause *pWC,        /* The WhereClause to be initialized */
   262    263     Parse *pParse,           /* The parsing context */
................................................................................
  1429   1430   ){
  1430   1431     int i, j;                       /* Loop counters */
  1431   1432     int sortOrder = 0;              /* XOR of index and ORDER BY sort direction */
  1432   1433     int nTerm;                      /* Number of ORDER BY terms */
  1433   1434     struct ExprList_item *pTerm;    /* A term of the ORDER BY clause */
  1434   1435     sqlite3 *db = pParse->db;
  1435   1436   
  1436         -  assert( pOrderBy!=0 );
         1437  +  if( !pOrderBy ) return 0;
         1438  +  if( wsFlags & WHERE_COLUMN_IN ) return 0;
         1439  +  if( pIdx->bUnordered ) return 0;
         1440  +
  1437   1441     nTerm = pOrderBy->nExpr;
  1438   1442     assert( nTerm>0 );
  1439   1443   
  1440   1444     /* Argument pIdx must either point to a 'real' named index structure, 
  1441   1445     ** or an index structure allocated on the stack by bestBtreeIndex() to
  1442   1446     ** represent the rowid index that is part of every table.  */
  1443   1447     assert( pIdx->zName || (pIdx->nColumn==1 && pIdx->aiColumn[0]==-1) );
................................................................................
  1519   1523         ** to sort because the primary key is unique and so none of the other
  1520   1524         ** columns will make any difference
  1521   1525         */
  1522   1526         j = nTerm;
  1523   1527       }
  1524   1528     }
  1525   1529   
  1526         -  *pbRev = sortOrder!=0;
         1530  +  if( pbRev ) *pbRev = sortOrder!=0;
  1527   1531     if( j>=nTerm ){
  1528   1532       /* All terms of the ORDER BY clause are covered by this index so
  1529   1533       ** this index can be used for sorting. */
  1530   1534       return 1;
  1531   1535     }
  1532   1536     if( pIdx->onError!=OE_None && i==pIdx->nColumn
  1533   1537         && (wsFlags & WHERE_COLUMN_NULL)==0
................................................................................
  2685   2689   static void bestBtreeIndex(
  2686   2690     Parse *pParse,              /* The parsing context */
  2687   2691     WhereClause *pWC,           /* The WHERE clause */
  2688   2692     struct SrcList_item *pSrc,  /* The FROM clause term to search */
  2689   2693     Bitmask notReady,           /* Mask of cursors not available for indexing */
  2690   2694     Bitmask notValid,           /* Cursors not available for any purpose */
  2691   2695     ExprList *pOrderBy,         /* The ORDER BY clause */
         2696  +  ExprList *pDistinct,        /* The select-list if query is DISTINCT */
  2692   2697     WhereCost *pCost            /* Lowest cost query plan */
  2693   2698   ){
  2694   2699     int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  2695   2700     Index *pProbe;              /* An index we are evaluating */
  2696   2701     Index *pIdx;                /* Copy of pProbe, or zero for IPK index */
  2697   2702     int eqTermMask;             /* Current mask of valid equality operators */
  2698   2703     int idxEqTermMask;          /* Index mask of valid equality operators */
................................................................................
  2825   2830       **             SELECT a, b, c FROM tbl WHERE a = 1;
  2826   2831       */
  2827   2832       int nEq;                      /* Number of == or IN terms matching index */
  2828   2833       int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
  2829   2834       int nInMul = 1;               /* Number of distinct equalities to lookup */
  2830   2835       int estBound = 100;           /* Estimated reduction in search space */
  2831   2836       int nBound = 0;               /* Number of range constraints seen */
  2832         -    int bSort = 0;                /* True if external sort required */
         2837  +    int bSort = !!pOrderBy;       /* True if external sort required */
         2838  +    int bDist = !!pDistinct;      /* True if index cannot help with DISTINCT */
  2833   2839       int bLookup = 0;              /* True if not a covering index */
  2834   2840       WhereTerm *pTerm;             /* A single term of the WHERE clause */
  2835   2841   #ifdef SQLITE_ENABLE_STAT2
  2836   2842       WhereTerm *pFirstTerm = 0;    /* First term matching the index */
  2837   2843   #endif
  2838   2844   
  2839   2845       /* Determine the values of nEq and nInMul */
................................................................................
  2889   2895         }
  2890   2896       }
  2891   2897   
  2892   2898       /* If there is an ORDER BY clause and the index being considered will
  2893   2899       ** naturally scan rows in the required order, set the appropriate flags
  2894   2900       ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
  2895   2901       ** will scan rows in a different order, set the bSort variable.  */
  2896         -    if( pOrderBy ){
  2897         -      if( (wsFlags & WHERE_COLUMN_IN)==0
  2898         -        && pProbe->bUnordered==0
  2899         -        && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy,
  2900         -                          nEq, wsFlags, &rev)
  2901         -      ){
  2902         -        wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
  2903         -        wsFlags |= (rev ? WHERE_REVERSE : 0);
  2904         -      }else{
  2905         -        bSort = 1;
  2906         -      }
         2902  +    if( isSortingIndex(
         2903  +          pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev)
         2904  +    ){
         2905  +      bSort = 0;
         2906  +      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
         2907  +      wsFlags |= (rev ? WHERE_REVERSE : 0);
         2908  +    }
         2909  +
         2910  +    /* If there is a DISTINCT qualifier and this index will scan rows in
         2911  +    ** order of the DISTINCT expressions, clear bDist and set the appropriate
         2912  +    ** flags in wsFlags. */
         2913  +    if( isSortingIndex(
         2914  +          pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0)
         2915  +    ){
         2916  +      bDist = 0;
         2917  +      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
  2907   2918       }
  2908   2919   
  2909   2920       /* If currently calculating the cost of using an index (not the IPK
  2910   2921       ** index), determine if all required column data may be obtained without 
  2911   2922       ** using the main table (i.e. if the index is a covering
  2912   2923       ** index for this query). If it is, set the WHERE_IDX_ONLY flag in
  2913   2924       ** wsFlags. Otherwise, set the bLookup variable to true.  */
................................................................................
  3016   3027       ** adds C*N*log10(N) to the cost, where N is the number of rows to be 
  3017   3028       ** sorted and C is a factor between 1.95 and 4.3.  We will split the
  3018   3029       ** difference and select C of 3.0.
  3019   3030       */
  3020   3031       if( bSort ){
  3021   3032         cost += nRow*estLog(nRow)*3;
  3022   3033       }
         3034  +    if( bDist ){
         3035  +      cost += nRow*estLog(nRow)*3;
         3036  +    }
  3023   3037   
  3024   3038       /**** Cost of using this index has now been computed ****/
  3025   3039   
  3026   3040       /* If there are additional constraints on this table that cannot
  3027   3041       ** be used with the current index, but which might lower the number
  3028   3042       ** of output rows, adjust the nRow value accordingly.  This only 
  3029   3043       ** matters if the current index is the least costly, so do not bother
................................................................................
  3161   3175       if( p->needToFreeIdxStr ){
  3162   3176         sqlite3_free(p->idxStr);
  3163   3177       }
  3164   3178       sqlite3DbFree(pParse->db, p);
  3165   3179     }else
  3166   3180   #endif
  3167   3181     {
  3168         -    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
         3182  +    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost);
  3169   3183     }
  3170   3184   }
  3171   3185   
  3172   3186   /*
  3173   3187   ** Disable a term in the WHERE clause.  Except, do not disable the term
  3174   3188   ** if it controls a LEFT OUTER JOIN and it did not originate in the ON
  3175   3189   ** or USING clause of that join.
................................................................................
  4123   4137       iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn);
  4124   4138   
  4125   4139       for(ii=0; ii<pOrWc->nTerm; ii++){
  4126   4140         WhereTerm *pOrTerm = &pOrWc->a[ii];
  4127   4141         if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){
  4128   4142           WhereInfo *pSubWInfo;          /* Info for single OR-term scan */
  4129   4143           /* Loop through table entries that match term pOrTerm. */
  4130         -        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0,
         4144  +        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0,
  4131   4145                           WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE |
  4132   4146                           WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY);
  4133   4147           if( pSubWInfo ){
  4134   4148             explainOneScan(
  4135   4149                 pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0
  4136   4150             );
  4137   4151             if( (wctrlFlags & WHERE_DUPLICATES_OK)==0 ){
................................................................................
  4270   4284         }
  4271   4285       }
  4272   4286       whereClauseClear(pWInfo->pWC);
  4273   4287       sqlite3DbFree(db, pWInfo);
  4274   4288     }
  4275   4289   }
  4276   4290   
         4291  +/*
         4292  +** Return true if the DISTINCT expression-list passed as the third argument
         4293  +** is redundant. A DISTINCT list is redundant if the database contains a
         4294  +** UNIQUE index that guarantees that the result of the query will be distinct
         4295  +** anyway.
         4296  +*/
         4297  +static int whereDistinctRedundant(
         4298  +  Parse *pParse,
         4299  +  SrcList *pTabList,
         4300  +  WhereClause *pWC,
         4301  +  ExprList *pDistinct
         4302  +){
         4303  +  Table *pTab;
         4304  +  Index *pIdx;
         4305  +  int i;
         4306  +  int iBase;
         4307  +
         4308  +  /* If there is more than one table or sub-select in the FROM clause of
         4309  +  ** this query, then it will not be possible to show that the DISTINCT 
         4310  +  ** clause is redundant. */
         4311  +  if( pTabList->nSrc!=1 ) return 0;
         4312  +  iBase = pTabList->a[0].iCursor;
         4313  +  pTab = pTabList->a[0].pTab;
         4314  +
         4315  +  /* Check if all the expressions in the ExprList are of type TK_COLUMN and
         4316  +  ** on the same table. If this is not the case, return early, since it will 
         4317  +  ** not be possible to prove that the DISTINCT qualifier is redundant. 
         4318  +  ** If any of the expressions is an IPK column, then return true.
         4319  +  */
         4320  +  for(i=0; i<pDistinct->nExpr; i++){
         4321  +    Expr *p = pDistinct->a[i].pExpr;
         4322  +    if( p->op!=TK_COLUMN || p->iTable!=iBase ) return 0;
         4323  +    if( p->iColumn<0 ) return 1;
         4324  +  }
         4325  +
         4326  +  /* Loop through all indices on the table, checking each to see if it makes
         4327  +  ** the DISTINCT qualifier redundant. It does so if:
         4328  +  **
         4329  +  **   1. The index is itself UNIQUE, and
         4330  +  **
         4331  +  **   2. All of the columns in the index are either part of the pDistinct
         4332  +  **      list, or else the WHERE clause contains a term of the form "col=X",
         4333  +  **      where X is a constant value. The collation sequences of the
         4334  +  **      comparison and select-list expressions must match those of the index.
         4335  +  */
         4336  +  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
         4337  +    if( pIdx->onError==OE_None ) continue;
         4338  +    for(i=0; i<pIdx->nColumn; i++){
         4339  +      int iCol = pIdx->aiColumn[i];
         4340  +      const char *zColl = pIdx->azColl[i];
         4341  +
         4342  +      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
         4343  +        int j;
         4344  +        for(j=0; j<pDistinct->nExpr; j++){
         4345  +          Expr *p = pDistinct->a[j].pExpr;
         4346  +          if( p->iColumn==iCol ){
         4347  +            CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
         4348  +            const char *zEColl = (pColl ? pColl : pParse->db->pDfltColl)->zName;
         4349  +            if( 0==sqlite3StrICmp(zColl, zEColl) ) break;
         4350  +          }
         4351  +        }
         4352  +        if( j==pDistinct->nExpr ) break;
         4353  +      }
         4354  +    }
         4355  +    if( i==pIdx->nColumn ){
         4356  +      /* This index implies that the DISTINCT qualifier is redundant. */
         4357  +      return 1;
         4358  +    }
         4359  +  }
         4360  +
         4361  +  return 0;
         4362  +}
         4363  +
  4277   4364   
  4278   4365   /*
  4279   4366   ** Generate the beginning of the loop used for WHERE clause processing.
  4280   4367   ** The return value is a pointer to an opaque structure that contains
  4281   4368   ** information needed to terminate the loop.  Later, the calling routine
  4282   4369   ** should invoke sqlite3WhereEnd() with the return value of this function
  4283   4370   ** in order to complete the WHERE clause processing.
................................................................................
  4364   4451   ** output order, then the *ppOrderBy is unchanged.
  4365   4452   */
  4366   4453   WhereInfo *sqlite3WhereBegin(
  4367   4454     Parse *pParse,        /* The parser context */
  4368   4455     SrcList *pTabList,    /* A list of all tables to be scanned */
  4369   4456     Expr *pWhere,         /* The WHERE clause */
  4370   4457     ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */
         4458  +  ExprList *pDistinct,  /* The select-list for DISTINCT queries - or NULL */
  4371   4459     u16 wctrlFlags        /* One of the WHERE_* flags defined in sqliteInt.h */
  4372   4460   ){
  4373   4461     int i;                     /* Loop counter */
  4374   4462     int nByteWInfo;            /* Num. bytes allocated for WhereInfo struct */
  4375   4463     int nTabList;              /* Number of elements in pTabList */
  4376   4464     WhereInfo *pWInfo;         /* Will become the return value of this function */
  4377   4465     Vdbe *v = pParse->pVdbe;   /* The virtual database engine */
................................................................................
  4490   4578     ** want to analyze these virtual terms, so start analyzing at the end
  4491   4579     ** and work forward so that the added virtual terms are never processed.
  4492   4580     */
  4493   4581     exprAnalyzeAll(pTabList, pWC);
  4494   4582     if( db->mallocFailed ){
  4495   4583       goto whereBeginError;
  4496   4584     }
         4585  +
         4586  +  /* Check if the DISTINCT qualifier, if there is one, is redundant. 
         4587  +  ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
         4588  +  ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
         4589  +  */
         4590  +  if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
         4591  +    pDistinct = 0;
         4592  +    pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
         4593  +  }
  4497   4594   
  4498   4595     /* Chose the best index to use for each table in the FROM clause.
  4499   4596     **
  4500   4597     ** This loop fills in the following fields:
  4501   4598     **
  4502   4599     **   pWInfo->a[].pIdx      The index to use for this level of the loop.
  4503   4600     **   pWInfo->a[].wsFlags   WHERE_xxx flags associated with pIdx
................................................................................
  4574   4671       notIndexed = 0;
  4575   4672       for(isOptimal=(iFrom<nTabList-1); isOptimal>=0 && bestJ<0; isOptimal--){
  4576   4673         Bitmask mask;             /* Mask of tables not yet ready */
  4577   4674         for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
  4578   4675           int doNotReorder;    /* True if this table should not be reordered */
  4579   4676           WhereCost sCost;     /* Cost information from best[Virtual]Index() */
  4580   4677           ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
         4678  +        ExprList *pDist;     /* DISTINCT clause for index to optimize */
  4581   4679     
  4582   4680           doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
  4583   4681           if( j!=iFrom && doNotReorder ) break;
  4584   4682           m = getMask(pMaskSet, pTabItem->iCursor);
  4585   4683           if( (m & notReady)==0 ){
  4586   4684             if( j==iFrom ) iFrom++;
  4587   4685             continue;
  4588   4686           }
  4589   4687           mask = (isOptimal ? m : notReady);
  4590   4688           pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
         4689  +        pDist = (i==0 ? pDistinct : 0);
  4591   4690           if( pTabItem->pIndex==0 ) nUnconstrained++;
  4592   4691     
  4593   4692           WHERETRACE(("=== trying table %d with isOptimal=%d ===\n",
  4594   4693                       j, isOptimal));
  4595   4694           assert( pTabItem->pTab );
  4596   4695   #ifndef SQLITE_OMIT_VIRTUALTABLE
  4597   4696           if( IsVirtual(pTabItem->pTab) ){
................................................................................
  4598   4697             sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
  4599   4698             bestVirtualIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
  4600   4699                              &sCost, pp);
  4601   4700           }else 
  4602   4701   #endif
  4603   4702           {
  4604   4703             bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
  4605         -                         &sCost);
         4704  +              pDist, &sCost);
  4606   4705           }
  4607   4706           assert( isOptimal || (sCost.used&notReady)==0 );
  4608   4707   
  4609   4708           /* If an INDEXED BY clause is present, then the plan must use that
  4610   4709           ** index if it uses any index at all */
  4611   4710           assert( pTabItem->pIndex==0 
  4612   4711                     || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0
................................................................................
  4658   4757       assert( bestJ>=0 );
  4659   4758       assert( notReady & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );
  4660   4759       WHERETRACE(("*** Optimizer selects table %d for loop %d"
  4661   4760                   " with cost=%g and nRow=%g\n",
  4662   4761                   bestJ, pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow));
  4663   4762       if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){
  4664   4763         *ppOrderBy = 0;
         4764  +    }
         4765  +    if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
         4766  +      assert( pWInfo->eDistinct==0 );
         4767  +      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
  4665   4768       }
  4666   4769       andFlags &= bestPlan.plan.wsFlags;
  4667   4770       pLevel->plan = bestPlan.plan;
  4668   4771       testcase( bestPlan.plan.wsFlags & WHERE_INDEXED );
  4669   4772       testcase( bestPlan.plan.wsFlags & WHERE_TEMP_INDEX );
  4670   4773       if( bestPlan.plan.wsFlags & (WHERE_INDEXED|WHERE_TEMP_INDEX) ){
  4671   4774         pLevel->iIdxCur = pParse->nTab++;

Changes to test/collate5.test.

    53     53       INSERT INTO collate5t1 VALUES('N', NULL);
    54     54     } 
    55     55   } {}
    56     56   do_test collate5-1.1 {
    57     57     execsql {
    58     58       SELECT DISTINCT a FROM collate5t1;
    59     59     }
    60         -} {A B N}
           60  +} {a b n}
    61     61   do_test collate5-1.2 {
    62     62     execsql {
    63     63       SELECT DISTINCT b FROM collate5t1;
    64     64     }
    65         -} {{} Apple apple banana}
           65  +} {apple Apple banana {}}
    66     66   do_test collate5-1.3 {
    67     67     execsql {
    68     68       SELECT DISTINCT a, b FROM collate5t1;
    69     69     }
    70         -} {A Apple a apple B banana N {}}
           70  +} {a apple A Apple b banana n {}}
    71     71   
    72     72   # Ticket #3376
    73     73   #
    74     74   do_test collate5-1.11 {
    75     75     execsql {
    76     76       CREATE TABLE tkt3376(a COLLATE nocase PRIMARY KEY);
    77     77       INSERT INTO tkt3376 VALUES('abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz');

Changes to test/e_select.test.

  1234   1234   do_select_tests e_select-5 {
  1235   1235     3.1 "SELECT ALL x FROM h2" {One Two Three Four one two three four}
  1236   1236     3.2 "SELECT ALL x FROM h1, h2 ON (x=b)" {One one Four four}
  1237   1237   
  1238   1238     3.1 "SELECT x FROM h2" {One Two Three Four one two three four}
  1239   1239     3.2 "SELECT x FROM h1, h2 ON (x=b)" {One one Four four}
  1240   1240   
  1241         -  4.1 "SELECT DISTINCT x FROM h2" {four one three two}
  1242         -  4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {four one}
         1241  +  4.1 "SELECT DISTINCT x FROM h2" {One Two Three Four}
         1242  +  4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {One Four}
  1243   1243   } 
  1244   1244   
  1245   1245   # EVIDENCE-OF: R-02054-15343 For the purposes of detecting duplicate
  1246   1246   # rows, two NULL values are considered to be equal.
  1247   1247   #
  1248   1248   do_select_tests e_select-5.5 {
  1249   1249     1  "SELECT DISTINCT d FROM h3" {{} 2 2,3 2,4 3}
  1250   1250   }
  1251   1251   
  1252   1252   # EVIDENCE-OF: R-58359-52112 The normal rules for selecting a collation
  1253   1253   # sequence to compare text values with apply.
  1254   1254   #
  1255   1255   do_select_tests e_select-5.6 {
  1256         -  1  "SELECT DISTINCT b FROM h1"                  {I IV four i iv one}
  1257         -  2  "SELECT DISTINCT b COLLATE nocase FROM h1"   {four i iv one}
  1258         -  3  "SELECT DISTINCT x FROM h2"                  {four one three two}
         1256  +  1  "SELECT DISTINCT b FROM h1"                  {one I i four IV iv}
         1257  +  2  "SELECT DISTINCT b COLLATE nocase FROM h1"   {one I four IV}
         1258  +  3  "SELECT DISTINCT x FROM h2"                  {One Two Three Four}
  1259   1259     4  "SELECT DISTINCT x COLLATE binary FROM h2"   {
  1260         -    Four One Three Two four one three two
         1260  +    One Two Three Four one two three four
  1261   1261     }
  1262   1262   }
  1263   1263   
  1264   1264   #-------------------------------------------------------------------------
  1265   1265   # The following tests - e_select-7.* - test that statements made to do
  1266   1266   # with compound SELECT statements are correct.
  1267   1267   #

Changes to test/fuzzer1.test.

  1372   1372   do_test fuzzer1-2.3 {
  1373   1373     execsql {
  1374   1374       SELECT DISTINCT streetname.n FROM f2, streetname
  1375   1375        WHERE f2.word MATCH 'tayle'
  1376   1376          AND f2.distance<=200
  1377   1377          AND streetname.n>=f2.word AND streetname.n<=(f2.word || x'F7BFBFBF')
  1378   1378     }
  1379         -} {steelewood tallia tallu talwyn taymouth thelema trailer {tyler finley}}
         1379  +} {{tyler finley} trailer taymouth steelewood tallia tallu talwyn thelema}
  1380   1380   
  1381   1381   
  1382   1382   finish_test

Changes to test/insert4.test.

   108    108   #
   109    109   do_test insert4-2.4.1 {
   110    110     execsql {
   111    111       DELETE FROM t3;
   112    112       INSERT INTO t3 SELECT DISTINCT * FROM t2;
   113    113       SELECT * FROM t3;
   114    114     }
   115         -} {1 9 9 1}
          115  +} {9 1 1 9}
   116    116   xferopt_test insert4-2.4.2 0
   117    117   do_test insert4-2.4.3 {
   118    118     catchsql {
   119    119       DELETE FROM t1;
   120    120       INSERT INTO t1 SELECT DISTINCT * FROM t2;
   121    121     }
   122    122   } {1 {constraint failed}}

Changes to test/misc5.test.

   501    501             )    
   502    502             WHERE artist <> '' 
   503    503           )  
   504    504          )       
   505    505         )  
   506    506         ORDER BY LOWER(artist) ASC;
   507    507       }
   508         -  } {one}
          508  +  } {two}
   509    509   }
   510    510   
   511    511   # Ticket #1370.  Do not overwrite small files (less than 1024 bytes)
   512    512   # when trying to open them as a database.
   513    513   #
   514    514   if {[permutation] == ""} {
   515    515     do_test misc5-4.1 {

Changes to test/selectB.test.

   351    351   
   352    352     do_test selectB-$ii.19 {
   353    353       execsql {
   354    354         SELECT * FROM (
   355    355           SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2
   356    356         )
   357    357       }
   358         -  } {0 1 0 1}
          358  +  } {0 1 1 0}
   359    359   
   360    360     do_test selectB-$ii.20 {
   361    361       execsql {
   362    362         SELECT DISTINCT * FROM (
   363    363           SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2
   364    364         )
   365    365       }