/ Check-in [fe7081e0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some problems with using window-functions in aggregate queries.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | exp-window-functions
Files: files | file ages | folders
SHA3-256:fe7081e0952950f577234fcbb58f3c1efa4579267654fd2f713dc4804e470e7e
User & Date: dan 2018-06-12 18:40:17
Context
2018-06-12
20:53
Fix another issue to do with window-functions in aggregate queries. check-in: 6413e38a user: dan tags: exp-window-functions
18:40
Fix some problems with using window-functions in aggregate queries. check-in: fe7081e0 user: dan tags: exp-window-functions
2018-06-11
20:50
Clarify the relationship between a Window object and its associated Expr. check-in: 0cd55e98 user: dan tags: exp-window-functions
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/select.c.

  5460   5460     sqlite3SelectPrep(pParse, p, 0);
  5461   5461     memset(&sSort, 0, sizeof(sSort));
  5462   5462     sSort.pOrderBy = p->pOrderBy;
  5463   5463     if( pParse->nErr || db->mallocFailed ){
  5464   5464       goto select_end;
  5465   5465     }
  5466   5466     assert( p->pEList!=0 );
  5467         -  isAgg = (p->selFlags & SF_Aggregate)!=0;
  5468   5467   #if SELECTTRACE_ENABLED
  5469   5468     if( sqlite3SelectTrace & 0x104 ){
  5470   5469       SELECTTRACE(0x104,pParse,p, ("after name resolution:\n"));
  5471   5470       sqlite3TreeViewSelect(0, p, 0);
  5472   5471     }
  5473   5472   #endif
  5474   5473   
................................................................................
  5482   5481   #if SELECTTRACE_ENABLED
  5483   5482     if( sqlite3SelectTrace & 0x108 ){
  5484   5483       SELECTTRACE(0x104,pParse,p, ("after window rewrite:\n"));
  5485   5484       sqlite3TreeViewSelect(0, p, 0);
  5486   5485     }
  5487   5486   #endif
  5488   5487     pTabList = p->pSrc;
         5488  +  isAgg = (p->selFlags & SF_Aggregate)!=0;
  5489   5489   
  5490   5490     /* Try to various optimizations (flattening subqueries, and strength
  5491   5491     ** reduction of join operators) in the FROM clause up into the main query
  5492   5492     */
  5493   5493   #if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
  5494   5494     for(i=0; !p->pPrior && i<pTabList->nSrc; i++){
  5495   5495       struct SrcList_item *pItem = &pTabList->a[i];

Changes to src/window.c.

     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   */
    13     13   #include "sqliteInt.h"
    14     14   
           15  +/*
           16  +** SELECT REWRITING
           17  +**
           18  +**   Any SELECT statement that contains one or more window functions in
           19  +**   either the select list or ORDER BY clause (the only two places window
           20  +**   functions may be used) is transformed by function sqlite3WindowRewrite()
           21  +**   in order to support window function processing. For example, with the
           22  +**   schema:
           23  +**
           24  +**     CREATE TABLE t1(a, b, c, d, e, f, g);
           25  +**
           26  +**   the statement:
           27  +**
           28  +**     SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM t1 ORDER BY e;
           29  +**
           30  +**   is transformed to:
           31  +**
           32  +**     SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM (
           33  +**         SELECT a, e, c, d, b FROM t1 ORDER BY c, d
           34  +**     ) ORDER BY e;
           35  +**
           36  +**   The flattening optimization is disabled when processing this transformed
           37  +**   SELECT statement. This allows the implementation of the window function
           38  +**   (in this case max()) to process rows sorted in order of (c, d), which
           39  +**   makes things easier for obvious reasons. More generally:
           40  +**
           41  +**     * FROM, WHERE, GROUP BY and HAVING clauses are all moved to 
           42  +**       the sub-query.
           43  +**
           44  +**     * ORDER BY, LIMIT and OFFSET remain part of the parent query.
           45  +**
           46  +**     * Terminals from each of the expression trees that make up the 
           47  +**       select-list and ORDER BY expressions in the parent query are
           48  +**       selected by the sub-query. For the purposes of the transformation,
           49  +**       terminals are column references and aggregate functions.
           50  +**
           51  +**   If there is more than one window function in the SELECT that uses
           52  +**   the same window declaration (the OVER bit), then a single scan may
           53  +**   be used to process more than one window function. For example:
           54  +**
           55  +**     SELECT max(b) OVER (PARTITION BY c ORDER BY d), 
           56  +**            min(e) OVER (PARTITION BY c ORDER BY d) 
           57  +**     FROM t1;
           58  +**
           59  +**   is transformed in the same way as the example above. However:
           60  +**
           61  +**     SELECT max(b) OVER (PARTITION BY c ORDER BY d), 
           62  +**            min(e) OVER (PARTITION BY a ORDER BY b) 
           63  +**     FROM t1;
           64  +**
           65  +**   Must be transformed to:
           66  +**
           67  +**     SELECT max(b) OVER (PARTITION BY c ORDER BY d) FROM (
           68  +**         SELECT e, min(e) OVER (PARTITION BY a ORDER BY b), c, d, b FROM
           69  +**           SELECT a, e, c, d, b FROM t1 ORDER BY a, b
           70  +**         ) ORDER BY c, d
           71  +**     ) ORDER BY e;
           72  +**
           73  +**   so that both min() and max() may process rows in the order defined by
           74  +**   their respective window declarations.
           75  +**
           76  +** INTERFACE WITH SELECT.C
           77  +**
           78  +**   When processing the rewritten SELECT statement, code in select.c calls
           79  +**   sqlite3WhereBegin() to begin iterating through the results of the
           80  +**   sub-query, which is always implemented as a co-routine. It then calls
           81  +**   sqlite3WindowCodeStep() to process rows and finish the scan by calling
           82  +**   sqlite3WhereEnd().
           83  +**
           84  +**   sqlite3WindowCodeStep() generates VM code so that, for each row returned
           85  +**   by the sub-query a sub-routine (OP_Gosub) coded by select.c is invoked.
           86  +**   When the sub-routine is invoked:
           87  +**
           88  +**     * The results of all window-functions for the row are stored
           89  +**       in the associated Window.regResult registers.
           90  +**
           91  +**     * The required terminal values are stored in the current row of
           92  +**       temp table Window.iEphCsr.
           93  +**
           94  +**   In some cases, depending on the window frame and the specific window
           95  +**   functions invoked, sqlite3WindowCodeStep() caches each entire partition
           96  +**   in a temp table before returning any rows. In other cases it does not.
           97  +**   This detail is encapsulated within this file, the code generated by
           98  +**   select.c is the same in either case.
           99  +**
          100  +** BUILT-IN WINDOW FUNCTIONS
          101  +**
          102  +**   This implementation features the following built-in window functions:
          103  +**
          104  +**     row_number()
          105  +**     rank()
          106  +**     dense_rank()
          107  +**     percent_rank()
          108  +**     cume_dist()
          109  +**     ntile(N)
          110  +**     lead(expr [, offset [, default]])
          111  +**     lag(expr [, offset [, default]])
          112  +**     first_value(expr)
          113  +**     last_value(expr)
          114  +**     nth_value(expr, N)
          115  +**   
          116  +**   These are the same built-in window functions supported by Postgres. 
          117  +**   Although the behaviour of aggregate window functions (functions that
          118  +**   can be used as either aggregates or window funtions) allows them to
          119  +**   be implemented using an API, built-in window functions are much more
          120  +**   esoteric. Additionally, some window functions (e.g. nth_value()) 
          121  +**   may only be implemented by caching the entire partition in memory.
          122  +**   As such, some built-in window functions use the same API as aggregate
          123  +**   window functions and some are implemented directly using VDBE 
          124  +**   instructions. Additionally, for those functions that use the API, the
          125  +**   window frame is sometimes modified before the SELECT statement is
          126  +**   rewritten. For example, regardless of the specified window frame, the
          127  +**   row_number() function always uses:
          128  +**
          129  +**     ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
          130  +**
          131  +**   See sqlite3WindowUpdate() for details.
          132  +*/
          133  +
    15    134   /*
    16    135   ** Implementation of built-in window function row_number(). Assumes that the
    17    136   ** window frame has been coerced to:
    18    137   **
    19    138   **   ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    20    139   */
    21    140   static void row_numberStepFunc(
................................................................................
   430    549               assert( pWin->pOwner==pExpr );
   431    550               return WRC_Prune;
   432    551             }
   433    552           }
   434    553         }
   435    554         /* Fall through.  */
   436    555   
          556  +    case TK_AGG_FUNCTION:
   437    557       case TK_COLUMN: {
   438    558         Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0);
   439    559         p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup);
   440    560         if( p->pSub ){
   441    561           assert( ExprHasProperty(pExpr, EP_Static)==0 );
   442    562           ExprSetProperty(pExpr, EP_Static);
   443    563           sqlite3ExprDelete(pParse->db, pExpr);
................................................................................
   591    711         ExprList *pList = 0;
   592    712         p->pSrc->a[0].pSelect = pSub;
   593    713         sqlite3SrcListAssignCursors(pParse, p->pSrc);
   594    714         if( sqlite3ExpandSubquery(pParse, &p->pSrc->a[0]) ){
   595    715           rc = SQLITE_NOMEM;
   596    716         }else{
   597    717           pSub->selFlags |= SF_Expanded;
          718  +        p->selFlags &= ~SF_Aggregate;
          719  +        sqlite3SelectPrep(pParse, pSub, 0);
   598    720         }
   599    721       }
   600    722   
   601    723       sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pMWin->iEphCsr, pSublist->nExpr);
   602    724     }
   603    725   
   604    726     return rc;

Changes to test/window4.tcl.

   123    123     SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING)
   124    124     FROM t5
   125    125   }
   126    126   execsql_test 3.6.3 {
   127    127     SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
   128    128     FROM t5
   129    129   }
          130  +
          131  +#=========================================================================
          132  +execsql_test 4.0 {
          133  +  DROP TABLE IF EXISTS ttt;
          134  +  CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER);
          135  +  INSERT INTO ttt VALUES(1, 1, 1);
          136  +  INSERT INTO ttt VALUES(2, 2, 2);
          137  +  INSERT INTO ttt VALUES(3, 3, 3);
          138  +
          139  +  INSERT INTO ttt VALUES(4, 1, 2);
          140  +  INSERT INTO ttt VALUES(5, 2, 3);
          141  +  INSERT INTO ttt VALUES(6, 3, 4);
          142  +
          143  +  INSERT INTO ttt VALUES(7, 1, 3);
          144  +  INSERT INTO ttt VALUES(8, 2, 4);
          145  +  INSERT INTO ttt VALUES(9, 3, 5);
          146  +}
          147  +
          148  +execsql_test 4.1 {
          149  +  SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b;
          150  +}
   130    151   
   131    152   
   132    153   finish_test
   133    154   

Changes to test/window4.test.

   206    206     FROM t5
   207    207   } {1 two   2 three   3 four   4 five   5 {}}
   208    208   
   209    209   do_execsql_test 3.6.3 {
   210    210     SELECT a, max(c) OVER (ORDER BY a ROWS BETWEEN 0 FOLLOWING AND 0 FOLLOWING)
   211    211     FROM t5
   212    212   } {1 one   2 two   3 three   4 four   5 five}
          213  +
          214  +do_execsql_test 4.0 {
          215  +  DROP TABLE IF EXISTS ttt;
          216  +  CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER);
          217  +  INSERT INTO ttt VALUES(1, 1, 1);
          218  +  INSERT INTO ttt VALUES(2, 2, 2);
          219  +  INSERT INTO ttt VALUES(3, 3, 3);
          220  +
          221  +  INSERT INTO ttt VALUES(4, 1, 2);
          222  +  INSERT INTO ttt VALUES(5, 2, 3);
          223  +  INSERT INTO ttt VALUES(6, 3, 4);
          224  +
          225  +  INSERT INTO ttt VALUES(7, 1, 3);
          226  +  INSERT INTO ttt VALUES(8, 2, 4);
          227  +  INSERT INTO ttt VALUES(9, 3, 5);
          228  +} {}
          229  +
          230  +do_execsql_test 4.1 {
          231  +  SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b;
          232  +} {3 1   4 2   5 3}
   213    233   
   214    234   finish_test