/ Check-in [022079cb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Instead of using a lemon %fallback directive, have the tokenizer try to figure out whether an instance of "WINDOW" should be TK_WINDOW or TK_ID.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | weak-fallback
Files: files | file ages | folders
SHA3-256: 022079cb0d67be5ac0a50dd9a4d41ee55ce8df681ecd0a544170d75fc8649978
User & Date: dan 2018-06-29 17:44:52
Context
2018-06-29
19:54
Improve on the previous checkin. Still a bit slow. check-in: c1fb41aa user: dan tags: weak-fallback
17:44
Instead of using a lemon %fallback directive, have the tokenizer try to figure out whether an instance of "WINDOW" should be TK_WINDOW or TK_ID. check-in: 022079cb user: dan tags: weak-fallback
2018-06-28
20:05
Modifications to parse.y to better support backwards compatibility for the "window" keyword. check-in: 7c4b879b user: dan tags: weak-fallback
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/parse.y.

   213    213     IGNORE IMMEDIATE INITIALLY INSTEAD LIKE_KW MATCH NO PLAN
   214    214     QUERY KEY OF OFFSET PRAGMA RAISE RECURSIVE RELEASE REPLACE RESTRICT ROW ROWS
   215    215     ROLLBACK SAVEPOINT TEMP TRIGGER VACUUM VIEW VIRTUAL WITH WITHOUT
   216    216   %ifdef SQLITE_OMIT_COMPOUND_SELECT
   217    217     EXCEPT INTERSECT UNION
   218    218   %endif SQLITE_OMIT_COMPOUND_SELECT
   219    219   %ifndef SQLITE_OMIT_WINDOWFUNC
   220         -  CURRENT FILTER FOLLOWING OVER PARTITION 
   221         -  PRECEDING RANGE UNBOUNDED 
          220  +  CURRENT FILTER FOLLOWING OVER PARTITION
          221  +  PRECEDING RANGE UNBOUNDED
   222    222   %endif SQLITE_OMIT_WINDOWFUNC
   223    223     REINDEX RENAME CTIME_KW IF
   224    224     .
   225    225   %wildcard ANY.
   226    226   
   227    227   // Define operator precedence early so that this is the first occurrence
   228    228   // of the operator tokens in the grammer.  Keeping the operators together
................................................................................
   253    253   // keywords.  Any non-standard keyword can also be an identifier.
   254    254   //
   255    255   %token_class id  ID|INDEXED.
   256    256   
   257    257   
   258    258   // And "ids" is an identifer-or-string.
   259    259   //
   260         -%token_class ids ID|STRING.
          260  +%token_class ids  ID|STRING.
   261    261   
   262    262   // The name of a column or table can be any of the following:
   263    263   //
   264    264   %type nm {Token}
   265    265   nm(A) ::= id(A).
   266    266   nm(A) ::= STRING(A).
   267    267   nm(A) ::= JOIN_KW(A).
   268         -nm(A) ::= WINDOW(A).
   269    268   
   270    269   // A typetoken is really zero or more tokens that form a type name such
   271    270   // as can be found after the column name in a CREATE TABLE statement.
   272    271   // Multiple tokens are concatenated to form the value of the typetoken.
   273    272   //
   274    273   %type typetoken {Token}
   275    274   typetoken(A) ::= .   {A.n = 0; A.z = 0;}
................................................................................
   277    276   typetoken(A) ::= typename(A) LP signed RP(Y). {
   278    277     A.n = (int)(&Y.z[Y.n] - A.z);
   279    278   }
   280    279   typetoken(A) ::= typename(A) LP signed COMMA signed RP(Y). {
   281    280     A.n = (int)(&Y.z[Y.n] - A.z);
   282    281   }
   283    282   %type typename {Token}
   284         -typename(A) ::= nm(A).
   285         -typename(A) ::= typename(A) nm(Y). {A.n=Y.n+(int)(Y.z-A.z);}
          283  +typename(A) ::= ids(A).
          284  +typename(A) ::= typename(A) ids(Y). {A.n=Y.n+(int)(Y.z-A.z);}
   286    285   signed ::= plus_num.
   287    286   signed ::= minus_num.
   288    287   
   289    288   // The scanpt non-terminal takes a value which is a pointer to the
   290    289   // input text just past the last token that has been shifted into
   291    290   // the parser.  By surrounding some phrase in the grammar with two
   292    291   // scanpt non-terminals, we can capture the input text for that phrase.
................................................................................
   339    338                                    {sqlite3AddPrimaryKey(pParse,0,R,I,Z);}
   340    339   ccons ::= UNIQUE onconf(R).      {sqlite3CreateIndex(pParse,0,0,0,0,R,0,0,0,0,
   341    340                                      SQLITE_IDXTYPE_UNIQUE);}
   342    341   ccons ::= CHECK LP expr(X) RP.   {sqlite3AddCheckConstraint(pParse,X);}
   343    342   ccons ::= REFERENCES nm(T) eidlist_opt(TA) refargs(R).
   344    343                                    {sqlite3CreateForeignKey(pParse,0,&T,TA,R);}
   345    344   ccons ::= defer_subclause(D).    {sqlite3DeferForeignKey(pParse,D);}
   346         -ccons ::= COLLATE nm(C).        {sqlite3AddCollateType(pParse, &C);}
          345  +ccons ::= COLLATE ids(C).        {sqlite3AddCollateType(pParse, &C);}
   347    346   
   348    347   // The optional AUTOINCREMENT keyword
   349    348   %type autoinc {int}
   350    349   autoinc(X) ::= .          {X = 0;}
   351    350   autoinc(X) ::= AUTOINCR.  {X = 1;}
   352    351   
   353    352   // The next group of rules parses the arguments to a REFERENCES clause
................................................................................
   985    984     Expr *temp2 = sqlite3ExprAlloc(pParse->db, TK_ID, &Y, 1);
   986    985     Expr *temp3 = sqlite3ExprAlloc(pParse->db, TK_ID, &Z, 1);
   987    986     Expr *temp4 = sqlite3PExpr(pParse, TK_DOT, temp2, temp3);
   988    987     A = sqlite3PExpr(pParse, TK_DOT, temp1, temp4);
   989    988   }
   990    989   term(A) ::= NULL|FLOAT|BLOB(X). {A=tokenExpr(pParse,@X,X); /*A-overwrites-X*/}
   991    990   term(A) ::= STRING(X).          {A=tokenExpr(pParse,@X,X); /*A-overwrites-X*/}
   992         -term(A) ::= WINDOW(X).          {A=tokenExpr(pParse,TK_ID,X);/*A-overwrites-X*/}
   993    991   term(A) ::= INTEGER(X). {
   994    992     A = sqlite3ExprAlloc(pParse->db, TK_INTEGER, &X, 1);
   995    993   }
   996    994   expr(A) ::= VARIABLE(X).     {
   997    995     if( !(X.z[0]=='#' && sqlite3Isdigit(X.z[1])) ){
   998    996       u32 n = X.n;
   999    997       A = tokenExpr(pParse, TK_VARIABLE, X);
................................................................................
  1009   1007         A = 0;
  1010   1008       }else{
  1011   1009         A = sqlite3PExpr(pParse, TK_REGISTER, 0, 0);
  1012   1010         if( A ) sqlite3GetInt32(&t.z[1], &A->iTable);
  1013   1011       }
  1014   1012     }
  1015   1013   }
  1016         -expr(A) ::= expr(A) COLLATE nm(C). {
         1014  +expr(A) ::= expr(A) COLLATE ids(C). {
  1017   1015     A = sqlite3ExprAddCollateToken(pParse, A, &C, 1);
  1018   1016   }
  1019   1017   %ifndef SQLITE_OMIT_CAST
  1020   1018   expr(A) ::= CAST LP expr(E) AS typetoken(T) RP. {
  1021   1019     A = sqlite3ExprAlloc(pParse->db, TK_CAST, &T, 1);
  1022   1020     sqlite3ExprAttachSubtrees(pParse->db, A, E, 0);
  1023   1021   }
  1024   1022   %endif  SQLITE_OMIT_CAST
  1025         -expr(A) ::= nm(X) LP distinct(D) exprlist(Y) RP 
         1023  +expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP 
  1026   1024   %ifndef SQLITE_OMIT_WINDOWFUNC
  1027   1025     over_opt(Z)
  1028   1026   %endif
  1029   1027   . {
  1030   1028     if( Y && Y->nExpr>pParse->db->aLimit[SQLITE_LIMIT_FUNCTION_ARG] ){
  1031   1029       sqlite3ErrorMsg(pParse, "too many arguments on function %T", &X);
  1032   1030     }
  1033   1031     A = sqlite3ExprFunction(pParse, Y, &X);
  1034   1032     sqlite3WindowAttach(pParse, A, Z);
  1035   1033     if( D==SF_Distinct && A ){
  1036   1034       A->flags |= EP_Distinct;
  1037   1035     }
  1038   1036   }
  1039         -expr(A) ::= nm(X) LP STAR RP
         1037  +expr(A) ::= id(X) LP STAR RP
  1040   1038   %ifndef SQLITE_OMIT_WINDOWFUNC
  1041   1039     over_opt(Z)
  1042   1040   %endif
  1043   1041   . {
  1044   1042     A = sqlite3ExprFunction(pParse, 0, &X);
  1045   1043     sqlite3WindowAttach(pParse, A, Z);
  1046   1044   }
................................................................................
  1441   1439   }
  1442   1440   eidlist(A) ::= nm(Y) collate(C) sortorder(Z). {
  1443   1441     A = parserAddExprIdListTerm(pParse, 0, &Y, C, Z); /*A-overwrites-Y*/
  1444   1442   }
  1445   1443   
  1446   1444   %type collate {int}
  1447   1445   collate(C) ::= .              {C = 0;}
  1448         -collate(C) ::= COLLATE nm.    {C = 1;}
         1446  +collate(C) ::= COLLATE ids.   {C = 1;}
  1449   1447   
  1450   1448   
  1451   1449   ///////////////////////////// The DROP INDEX command /////////////////////////
  1452   1450   //
  1453   1451   cmd ::= DROP INDEX ifexists(E) fullname(X).   {sqlite3DropIndex(pParse, X, E);}
  1454   1452   
  1455   1453   ///////////////////////////// The VACUUM command /////////////////////////////

Changes to src/sqliteInt.h.

  4295   4295   ** The interface to the LEMON-generated parser
  4296   4296   */
  4297   4297   #ifndef SQLITE_AMALGAMATION
  4298   4298     void *sqlite3ParserAlloc(void*(*)(u64), Parse*);
  4299   4299     void sqlite3ParserFree(void*, void(*)(void*));
  4300   4300   #endif
  4301   4301   void sqlite3Parser(void*, int, Token);
         4302  +int sqlite3ParserFallback(int);
  4302   4303   #ifdef YYTRACKMAXSTACKDEPTH
  4303   4304     int sqlite3ParserStackPeak(void*);
  4304   4305   #endif
  4305   4306   
  4306   4307   void sqlite3AutoLoadExtensions(sqlite3*);
  4307   4308   #ifndef SQLITE_OMIT_LOAD_EXTENSION
  4308   4309     void sqlite3CloseExtensions(sqlite3*);

Changes to src/tokenize.c.

   184    184   #endif
   185    185   
   186    186   /* Make the IdChar function accessible from ctime.c */
   187    187   #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
   188    188   int sqlite3IsIdChar(u8 c){ return IdChar(c); }
   189    189   #endif
   190    190   
          191  +/*
          192  +** Return the id of the next token in string (*pz). Before returning, set
          193  +** (*pz) to point to the byte following the parsed token.
          194  +**
          195  +** This function assumes that any keywords that start with "w" are 
          196  +** actually TK_ID.
          197  +*/
          198  +static int windowGetToken(const unsigned char **pz){
          199  +  int ret;
          200  +  const unsigned char *z = *pz;
          201  +  if( z[0]=='w' || z[0]=='W' ){
          202  +    do { z++; }while( IdChar(z[0]) );
          203  +    ret = TK_ID;
          204  +  }else{
          205  +    z += sqlite3GetToken(z, &ret);
          206  +  }
          207  +  *pz = z;
          208  +  return ret;
          209  +}
          210  +
          211  +/*
          212  +** The tokenizer has just parsed the keyword WINDOW. In this case the token
          213  +** may really be the keyword (TK_WINDOW), or may be an identifier (TK_ID).
          214  +** This function determines which it is by inspecting the next two tokens
          215  +** in the input stream. Specifically, the token is TK_WINDOW if the following
          216  +** two tokens are:
          217  +**
          218  +**   * TK_ID, or something else that can be used as a window name, and
          219  +**   * TK_AS.
          220  +**
          221  +** Instead of using sqlite3GetToken() to parse tokens directly, this function
          222  +** uses windowGetToken(). This is to avoid recursion if the input is similar
          223  +** to "window window window window".
          224  +*/
          225  +static void analyzeWindowKeyword(const unsigned char *z, int *tokenType){
          226  +  int t;
          227  +  assert( *tokenType==TK_WINDOW );
          228  +  while( (t = windowGetToken(&z))==TK_SPACE );
          229  +  if( t!=TK_ID && t!=TK_STRING 
          230  +   && t!=TK_JOIN_KW && sqlite3ParserFallback(t)!=TK_ID 
          231  +  ){
          232  +    *tokenType = TK_ID;
          233  +  }else{
          234  +    while( (t = windowGetToken(&z))==TK_SPACE );
          235  +    if( t!=TK_AS ){
          236  +      *tokenType = TK_ID;
          237  +    }
          238  +  }
          239  +}
   191    240   
   192    241   /*
   193    242   ** Return the length (in bytes) of the token that begins at z[0]. 
   194    243   ** Store the token type in *tokenType before returning.
   195    244   */
   196    245   int sqlite3GetToken(const unsigned char *z, int *tokenType){
   197    246     int i, c;
................................................................................
   429    478           /* This token started out using characters that can appear in keywords,
   430    479           ** but z[i] is a character not allowed within keywords, so this must
   431    480           ** be an identifier instead */
   432    481           i++;
   433    482           break;
   434    483         }
   435    484         *tokenType = TK_ID;
   436         -      return keywordCode((char*)z, i, tokenType);
          485  +      keywordCode((char*)z, i, tokenType);
          486  +      if( *tokenType==TK_WINDOW ){
          487  +        assert( i==6 );
          488  +        analyzeWindowKeyword(&z[6], tokenType);
          489  +      }
          490  +      return i;
   437    491       }
   438    492       case CC_X: {
   439    493   #ifndef SQLITE_OMIT_BLOB_LITERAL
   440    494         testcase( z[0]=='x' ); testcase( z[0]=='X' );
   441    495         if( z[1]=='\'' ){
   442    496           *tokenType = TK_BLOB;
   443    497           for(i=2; sqlite3Isxdigit(z[i]); i++){}

Added test/window6.test.

            1  +# 2018 May 8
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. Specifically,
           12  +# it tests the sqlite3_create_window_function() API.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +set testprefix window6
           18  +
           19  +ifcapable !windowfunc {
           20  +  finish_test
           21  +  return
           22  +}
           23  +
           24  +set setup {
           25  +  CREATE TABLE %t1(%a, %b %typename);
           26  +  INSERT INTO %t1 VALUES(1, 'a');
           27  +  INSERT INTO %t1 VALUES(2, 'b');
           28  +  INSERT INTO %t1 VALUES(3, 'c');
           29  +  INSERT INTO %t1 VALUES(4, 'd');
           30  +  INSERT INTO %t1 VALUES(5, 'e');
           31  +}
           32  +
           33  +foreach {tn vars} {
           34  +  1 {}
           35  +  2 { set A(%t1) over }
           36  +  3 { set A(%a)  over }
           37  +  4 { 
           38  +    set A(%alias)   over 
           39  +    set A(%a)       following 
           40  +    set A(%b)       over 
           41  +  }
           42  +  5 { 
           43  +    set A(%t1)      over 
           44  +    set A(%a)       following 
           45  +    set A(%b)       preceding 
           46  +    set A(%w)       current 
           47  +    set A(%alias)   filter 
           48  +    set A(%typename)  window 
           49  +  }
           50  +
           51  +  6 { 
           52  +    set A(%a)       window 
           53  +  }
           54  +} {
           55  +  set A(%t1)    t1
           56  +  set A(%a)     a
           57  +  set A(%b)     b
           58  +  set A(%w)     w
           59  +  set A(%alias) alias
           60  +  set A(%typename) integer
           61  +  eval $vars
           62  +
           63  +  set MAP [array get A]
           64  +  set setup_sql [string map $MAP $setup]
           65  +  reset_db
           66  +  execsql $setup_sql
           67  +
           68  +  do_execsql_test 1.$tn.1 [string map $MAP {
           69  +    SELECT group_concat(%a, '.') OVER (ORDER BY %b) FROM %t1
           70  +  }] {1 1.2 1.2.3 1.2.3.4 1.2.3.4.5}
           71  +
           72  +  do_execsql_test 1.$tn.2 [string map $MAP {
           73  +    SELECT sum(%a) OVER %w FROM %t1 WINDOW %w AS (ORDER BY %b)
           74  +  }] {1 3 6 10 15}
           75  +
           76  +  do_execsql_test 1.$tn.3 [string map $MAP {
           77  +    SELECT sum(%alias.%a) OVER %w FROM %t1 %alias WINDOW %w AS (ORDER BY %b)
           78  +  }] {1 3 6 10 15}
           79  +
           80  +  do_execsql_test 1.$tn.4 [string map $MAP {
           81  +    SELECT sum(%a) %alias FROM %t1
           82  +  }] {15}
           83  +}
           84  +
           85  +
           86  +proc winproc {args} { return "window: $args" }
           87  +db func window winproc
           88  +do_execsql_test 2.0 {
           89  +  SELECT window('hello world');
           90  +} {{window: {hello world}}}
           91  +
           92  +proc wincmp {a b} { string compare $b $a }
           93  +db collate window wincmp
           94  +do_execsql_test 3.0 {
           95  +  CREATE TABLE window(x COLLATE window);
           96  +  INSERT INTO window VALUES('bob'), ('alice'), ('cate');
           97  +  SELECT * FROM window ORDER BY x COLLATE window;
           98  +} {cate bob alice}
           99  +do_execsql_test 3.1 {
          100  +  DROP TABLE window;
          101  +  CREATE TABLE x1(x);
          102  +  INSERT INTO x1 VALUES('bob'), ('alice'), ('cate');
          103  +  CREATE INDEX window ON x1(x COLLATE window);
          104  +  SELECT * FROM x1 ORDER BY x COLLATE window;
          105  +} {cate bob alice}
          106  +
          107  +
          108  +do_execsql_test 4.0 { CREATE TABLE t4(x, y); }
          109  +
          110  +# do_execsql_test 4.1 { PRAGMA parser_trace = 1 }
          111  +do_execsql_test 4.1 { 
          112  +  SELECT * FROM t4 window, t4;
          113  +}
          114  +
          115  +
          116  +finish_test
          117  +

Changes to tool/lempar.c.

  1064   1064         cDiv = ' ';
  1065   1065       }
  1066   1066       fprintf(yyTraceFILE,"]\n");
  1067   1067     }
  1068   1068   #endif
  1069   1069     return;
  1070   1070   }
         1071  +
         1072  +/*
         1073  +** Return the fallback token corresponding to canonical token iToken, or
         1074  +** 0 if iToken has no fallback.
         1075  +*/
         1076  +int ParseFallback(int iToken){
         1077  +#ifdef YYFALLBACK
         1078  +  if( iToken<sizeof(yyFallback)/sizeof(yyFallback[0]) ){
         1079  +    return yyFallback[iToken];
         1080  +  }
         1081  +#endif
         1082  +  return 0;
         1083  +}
         1084  +