/ Check-in [5375a3ce]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Automatically transfer terms from the HAVING clause to the WHERE clause of an aggregate query in cases where the result of evaluating the term depends only one one or more of the GROUP BY expressions (and on no other inputs).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | having-where-optimization
Files: files | file ages | folders
SHA3-256:5375a3ce56f1d993b13b469fe33ec7679948f53940f62a15ddbaeb8aaa26a22c
User & Date: dan 2017-04-29 20:53:09
Context
2017-05-01
14:09
Add extra tests for the optimization on this branch. check-in: 4921cd95 user: dan tags: having-where-optimization
2017-04-29
20:53
Automatically transfer terms from the HAVING clause to the WHERE clause of an aggregate query in cases where the result of evaluating the term depends only one one or more of the GROUP BY expressions (and on no other inputs). check-in: 5375a3ce user: dan tags: having-where-optimization
15:27
Evaluate WHERE clause terms that reference only the index before evaluating terms that require the table, and thereby avoid seeking the table row if index terms are false. This is called the "push-down" optimization in the MySQL world, we are told. check-in: d7bb79ed user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/expr.c.

  1811   1811   ** expression must not refer to any non-deterministic function nor any
  1812   1812   ** table other than iCur.
  1813   1813   */
  1814   1814   int sqlite3ExprIsTableConstant(Expr *p, int iCur){
  1815   1815     return exprIsConst(p, 3, iCur);
  1816   1816   }
  1817   1817   
         1818  +
         1819  +/*
         1820  +** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy().
         1821  +*/
         1822  +static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){
         1823  +  ExprList *pGroupBy = pWalker->u.pGroupBy;
         1824  +  int i;
         1825  +
         1826  +  /* Check if pExpr is identical to any GROUP BY term. If so, consider
         1827  +  ** it constant.  */
         1828  +  for(i=0; i<pGroupBy->nExpr; i++){
         1829  +    Expr *p = pGroupBy->a[i].pExpr;
         1830  +    if( sqlite3ExprCompare(pExpr, p, -1)<2 ){
         1831  +      CollSeq *pColl = sqlite3ExprCollSeq(pWalker->pParse, p);
         1832  +      if( pColl==0 || sqlite3_stricmp("BINARY", pColl->zName)==0 ){
         1833  +        return WRC_Prune;
         1834  +      }
         1835  +    }
         1836  +  }
         1837  +
         1838  +  /* Check if pExpr is a sub-select. If so, consider it variable. */
         1839  +  if( ExprHasProperty(pExpr, EP_xIsSelect) ){
         1840  +    pWalker->eCode = 0;
         1841  +    return WRC_Abort;
         1842  +  }
         1843  +
         1844  +  return exprNodeIsConstant(pWalker, pExpr);
         1845  +}
         1846  +
         1847  +/*
         1848  +** Walk the expression tree passed as the first argument. Return non-zero
         1849  +** if the expression consists entirely of constants or copies of terms 
         1850  +** in pGroupBy that sort with the BINARY collation sequence.
         1851  +*/
         1852  +int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){
         1853  +  Walker w;
         1854  +  memset(&w, 0, sizeof(w));
         1855  +  w.eCode = 1;
         1856  +  w.xExprCallback = exprNodeIsConstantOrGroupBy;
         1857  +  w.u.pGroupBy = pGroupBy;
         1858  +  w.pParse = pParse;
         1859  +  sqlite3WalkExpr(&w, p);
         1860  +  return w.eCode;
         1861  +}
         1862  +
  1818   1863   /*
  1819   1864   ** Walk an expression tree.  Return non-zero if the expression is constant
  1820   1865   ** or a function call with constant arguments.  Return and 0 if there
  1821   1866   ** are any variables.
  1822   1867   **
  1823   1868   ** For the purposes of this function, a double-quoted string (ex: "abc")
  1824   1869   ** is considered a variable but a single-quoted string (ex: 'abc') is

Changes to src/select.c.

  4874   4874           pParse->pVdbe, OP_Explain, pParse->iSelectId, 0, 0, zEqp, P4_DYNAMIC
  4875   4875       );
  4876   4876     }
  4877   4877   }
  4878   4878   #else
  4879   4879   # define explainSimpleCount(a,b,c)
  4880   4880   #endif
         4881  +
         4882  +/*
         4883  +** Context object for havingToWhereExprCb().
         4884  +*/
         4885  +struct HavingToWhereCtx {
         4886  +  Expr **ppWhere;
         4887  +  ExprList *pGroupBy;
         4888  +};
         4889  +
         4890  +/*
         4891  +** sqlite3WalkExpr() callback used by havingToWhere().
         4892  +**
         4893  +** If the node passed to the callback is a TK_AND node, return 
         4894  +** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes.
         4895  +**
         4896  +** Otherwise, return WRC_Prune. In this case, also check if the 
         4897  +** sub-expression matches the criteria for being moved to the WHERE
         4898  +** clause. If so, add it to the WHERE clause and replace the sub-expression
         4899  +** within the HAVING expression with a constant "1".
         4900  +*/
         4901  +static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){
         4902  +  if( pExpr->op!=TK_AND ){
         4903  +    struct HavingToWhereCtx *p = pWalker->u.pHavingCtx;
         4904  +    if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, p->pGroupBy) ){
         4905  +      sqlite3 *db = pWalker->pParse->db;
         4906  +      Expr *pNew = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[1], 0);
         4907  +      if( pNew ){
         4908  +        Expr *pWhere = *(p->ppWhere);
         4909  +        SWAP(Expr, *pNew, *pExpr);
         4910  +        if( pWhere ){
         4911  +          pNew = sqlite3ExprAnd(db, pWhere, pNew);
         4912  +        }
         4913  +        *(p->ppWhere) = pNew;
         4914  +      }
         4915  +    }
         4916  +    return WRC_Prune;
         4917  +  }
         4918  +  return WRC_Continue;
         4919  +}
         4920  +
         4921  +/*
         4922  +** Transfer eligible terms from the HAVING clause of a query, which is
         4923  +** processed after grouping, to the WHERE clause, which is processed before
         4924  +** grouping. For example, the query:
         4925  +**
         4926  +**   SELECT * FROM <tables> WHERE a=? GROUP BY b HAVING b=? AND c=?
         4927  +**
         4928  +** can be rewritten as:
         4929  +**
         4930  +**   SELECT * FROM <tables> WHERE a=? AND b=? GROUP BY b HAVING c=?
         4931  +**
         4932  +** A term of the HAVING expression is eligible for transfer if it consists
         4933  +** entirely of constants and expressions that are also GROUP BY terms that
         4934  +** use the "BINARY" collation sequence.
         4935  +*/
         4936  +static void havingToWhere(
         4937  +  Parse *pParse,
         4938  +  ExprList *pGroupBy,
         4939  +  Expr *pHaving, 
         4940  +  Expr **ppWhere
         4941  +){
         4942  +  struct HavingToWhereCtx sCtx;
         4943  +  Walker sWalker;
         4944  +
         4945  +  sCtx.ppWhere = ppWhere;
         4946  +  sCtx.pGroupBy = pGroupBy;
         4947  +
         4948  +  memset(&sWalker, 0, sizeof(sWalker));
         4949  +  sWalker.pParse = pParse;
         4950  +  sWalker.xExprCallback = havingToWhereExprCb;
         4951  +  sWalker.u.pHavingCtx = &sCtx;
         4952  +  sqlite3WalkExpr(&sWalker, pHaving);
         4953  +}
  4881   4954   
  4882   4955   /*
  4883   4956   ** Generate code for the SELECT statement given in the p argument.  
  4884   4957   **
  4885   4958   ** The results are returned according to the SelectDest structure.
  4886   4959   ** See comments in sqliteInt.h for further information.
  4887   4960   **
................................................................................
  5339   5412       sNC.pAggInfo = &sAggInfo;
  5340   5413       sAggInfo.mnReg = pParse->nMem+1;
  5341   5414       sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0;
  5342   5415       sAggInfo.pGroupBy = pGroupBy;
  5343   5416       sqlite3ExprAnalyzeAggList(&sNC, pEList);
  5344   5417       sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy);
  5345   5418       if( pHaving ){
         5419  +      if( pGroupBy ){
         5420  +        assert( pWhere==p->pWhere );
         5421  +        havingToWhere(pParse, pGroupBy, pHaving, &p->pWhere);
         5422  +        pWhere = p->pWhere;
         5423  +      }
  5346   5424         sqlite3ExprAnalyzeAggregates(&sNC, pHaving);
  5347   5425       }
  5348   5426       sAggInfo.nAccumulator = sAggInfo.nColumn;
  5349   5427       for(i=0; i<sAggInfo.nFunc; i++){
  5350   5428         assert( !ExprHasProperty(sAggInfo.aFunc[i].pExpr, EP_xIsSelect) );
  5351   5429         sNC.ncFlags |= NC_InAggFunc;
  5352   5430         sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList);

Changes to src/sqliteInt.h.

  3312   3312     Parse *pParse;                            /* Parser context.  */
  3313   3313     int (*xExprCallback)(Walker*, Expr*);     /* Callback for expressions */
  3314   3314     int (*xSelectCallback)(Walker*,Select*);  /* Callback for SELECTs */
  3315   3315     void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */
  3316   3316     int walkerDepth;                          /* Number of subqueries */
  3317   3317     u8 eCode;                                 /* A small processing code */
  3318   3318     union {                                   /* Extra data for callback */
  3319         -    NameContext *pNC;                          /* Naming context */
  3320         -    int n;                                     /* A counter */
  3321         -    int iCur;                                  /* A cursor number */
  3322         -    SrcList *pSrcList;                         /* FROM clause */
  3323         -    struct SrcCount *pSrcCount;                /* Counting column references */
  3324         -    struct CCurHint *pCCurHint;                /* Used by codeCursorHint() */
  3325         -    int *aiCol;                                /* array of column indexes */
  3326         -    struct IdxCover *pIdxCover;                /* Check for index coverage */
  3327         -    struct IdxExprTrans *pIdxTrans;            /* Convert indexed expr to column */
         3319  +    NameContext *pNC;                         /* Naming context */
         3320  +    int n;                                    /* A counter */
         3321  +    int iCur;                                 /* A cursor number */
         3322  +    SrcList *pSrcList;                        /* FROM clause */
         3323  +    struct SrcCount *pSrcCount;               /* Counting column references */
         3324  +    struct CCurHint *pCCurHint;               /* Used by codeCursorHint() */
         3325  +    int *aiCol;                               /* array of column indexes */
         3326  +    struct IdxCover *pIdxCover;               /* Check for index coverage */
         3327  +    struct IdxExprTrans *pIdxTrans;           /* Convert indexed expr to column */
         3328  +    ExprList *pGroupBy;                       /* GROUP BY clause */
         3329  +    struct HavingToWhereCtx *pHavingCtx;      /* HAVING to WHERE clause ctx */
  3328   3330     } u;
  3329   3331   };
  3330   3332   
  3331   3333   /* Forward declarations */
  3332   3334   int sqlite3WalkExpr(Walker*, Expr*);
  3333   3335   int sqlite3WalkExprList(Walker*, ExprList*);
  3334   3336   int sqlite3WalkSelect(Walker*, Select*);
................................................................................
  3790   3792   void sqlite3RollbackTransaction(Parse*);
  3791   3793   void sqlite3Savepoint(Parse*, int, Token*);
  3792   3794   void sqlite3CloseSavepoints(sqlite3 *);
  3793   3795   void sqlite3LeaveMutexAndCloseZombie(sqlite3*);
  3794   3796   int sqlite3ExprIsConstant(Expr*);
  3795   3797   int sqlite3ExprIsConstantNotJoin(Expr*);
  3796   3798   int sqlite3ExprIsConstantOrFunction(Expr*, u8);
         3799  +int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*);
  3797   3800   int sqlite3ExprIsTableConstant(Expr*,int);
  3798   3801   #ifdef SQLITE_ENABLE_CURSOR_HINTS
  3799   3802   int sqlite3ExprContainsSubquery(Expr*);
  3800   3803   #endif
  3801   3804   int sqlite3ExprIsInteger(Expr*, int*);
  3802   3805   int sqlite3ExprCanBeNull(const Expr*);
  3803   3806   int sqlite3ExprNeedsNoAffinityChange(const Expr*, char);

Added test/having.test.

            1  +# 2017 April 30
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +# Test the HAVING->WHERE optimization.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +set testprefix having
           18  +
           19  +do_execsql_test 1.0 {
           20  +  CREATE TABLE t1(a, b);
           21  +  INSERT INTO t1 VALUES(1, 1);
           22  +  INSERT INTO t1 VALUES(2, 2);
           23  +  INSERT INTO t1 VALUES(1, 3);
           24  +  INSERT INTO t1 VALUES(2, 4);
           25  +  INSERT INTO t1 VALUES(1, 5);
           26  +  INSERT INTO t1 VALUES(2, 6);
           27  +} {}
           28  +
           29  +foreach {tn sql res} {
           30  +  1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" {2 12}
           31  +  2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2 AND sum(b)>10" {2 12}
           32  +  3 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>12" {}
           33  +} {
           34  +  do_execsql_test 1.$tn $sql $res
           35  +}
           36  +
           37  +proc compare_vdbe {sql1 sql2} {
           38  +  set r1 [list]
           39  +  set r2 [list]
           40  +  db eval "explain $sql1" { lappend r1 $opcode $p1 $p2 $p3}
           41  +  db eval "explain $sql2" { lappend r2 $opcode $p1 $p2 $p3}
           42  +  return [expr {$r1==$r2}]
           43  +}
           44  +
           45  +proc do_compare_vdbe_test {tn sql1 sql2 res} {
           46  +  uplevel [list do_test $tn [list compare_vdbe $sql1 $sql2] $res]
           47  +}
           48  +
           49  +do_compare_vdbe_test 2.1 {
           50  +  SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2
           51  +} {
           52  +  SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a
           53  +} 1
           54  +do_compare_vdbe_test 2.2 {
           55  +  SELECT a, sum(b) FROM t1 GROUP BY a+1 HAVING a=2
           56  +} {
           57  +  SELECT a, sum(b) FROM t1 GROUP BY a+1 HAVING a=2
           58  +} 1
           59  +
           60  +foreach {tn sql1 sql2} {
           61  +  1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2"
           62  +    "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a"
           63  +
           64  +  2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>5 AND a=2"
           65  +    "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a HAVING sum(b)>5"
           66  +
           67  +  3 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING a=2"
           68  +    "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE binary"
           69  +
           70  +  4 {
           71  +      SELECT x,y FROM (
           72  +        SELECT a AS x, sum(b) AS y FROM t1 
           73  +        GROUP BY a
           74  +      ) WHERE x BETWEEN 8888 AND 9999
           75  +    } {
           76  +      SELECT x,y FROM (
           77  +        SELECT a AS x, sum(b) AS y FROM t1 
           78  +        WHERE x BETWEEN 8888 AND 9999 
           79  +        GROUP BY a
           80  +      )
           81  +    }
           82  +} {
           83  +  do_compare_vdbe_test 3.$tn $sql1 $sql2 1
           84  +}
           85  +
           86  +foreach {tn sql1 sql2} {
           87  +  1 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE nocase HAVING a=2"
           88  +    "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE nocase"
           89  +} {
           90  +  do_compare_vdbe_test 4.$tn $sql1 $sql2 0
           91  +}
           92  +
           93  +finish_test
           94  +