/ Check-in [c78b357c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Bare identifiers in ORDER BY clauses bind more tightly to output column name, but identifiers in expressions bind more tightly to input column names. This is a compromise between SQL92 and SQL99 behavior and is what PostgreSQL and MS-SQL do. Ticket [f617ea3125e9c].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c78b357c00a35ed48ce2ffbc041de8d22570d1e2
User & Date: drh 2013-08-15 20:24:27
Context
2013-08-15
22:40
Make sure that GROUP BY terms select input column names in preference to output column names, in compliance with the SQL standard. Ticket [1c69be2dafc28]. check-in: f2d175f9 user: drh tags: trunk
20:24
Bare identifiers in ORDER BY clauses bind more tightly to output column name, but identifiers in expressions bind more tightly to input column names. This is a compromise between SQL92 and SQL99 behavior and is what PostgreSQL and MS-SQL do. Ticket [f617ea3125e9c]. check-in: c78b357c user: drh tags: trunk
20:05
Make it easy to attach a debugger the test fixture process prior to any tests being run. check-in: 53cd9ebf user: mistachkin tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/resolve.c.

    51     51   ** TK_AS operator.  The TK_AS operator causes the expression to be
    52     52   ** evaluated just once and then reused for each alias.
    53     53   **
    54     54   ** The reason for suppressing the TK_AS term when the expression is a simple
    55     55   ** column reference is so that the column reference will be recognized as
    56     56   ** usable by indices within the WHERE clause processing logic. 
    57     57   **
    58         -** Hack:  The TK_AS operator is inhibited if zType[0]=='G'.  This means
           58  +** The TK_AS operator is inhibited if zType[0]=='G'.  This means
    59     59   ** that in a GROUP BY clause, the expression is evaluated twice.  Hence:
    60     60   **
    61     61   **     SELECT random()%5 AS x, count(*) FROM tab GROUP BY x
    62     62   **
    63     63   ** Is equivalent to:
    64     64   **
    65     65   **     SELECT random()%5 AS x, count(*) FROM tab GROUP BY random()%5
    66     66   **
    67     67   ** The result of random()%5 in the GROUP BY clause is probably different
    68         -** from the result in the result-set.  We might fix this someday.  Or
    69         -** then again, we might not...
           68  +** from the result in the result-set.  On the other hand Standard SQL does
           69  +** not allow the GROUP BY clause to contain references to result-set columns.
           70  +** So this should never come up in well-formed queries.
    70     71   **
    71     72   ** If the reference is followed by a COLLATE operator, then make sure
    72     73   ** the COLLATE operator is preserved.  For example:
    73     74   **
    74     75   **     SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase;
    75     76   **
    76     77   ** Should be transformed into:
................................................................................
   392    393       **
   393    394       **     SELECT a+b AS x FROM table WHERE x<10;
   394    395       **
   395    396       ** In cases like this, replace pExpr with a copy of the expression that
   396    397       ** forms the result set entry ("a+b" in the example) and return immediately.
   397    398       ** Note that the expression in the result set should have already been
   398    399       ** resolved by the time the WHERE clause is resolved.
          400  +    **
          401  +    ** The ability to use an output result-set column in the WHERE, GROUP BY,
          402  +    ** or HAVING clauses, or as part of a larger expression in the ORDRE BY
          403  +    ** clause is not standard SQL.  This is a (goofy) SQLite extension, that
          404  +    ** is supported for backwards compatibility only.  TO DO: Issue a warning
          405  +    ** on sqlite3_log() whenever the capability is used.
   399    406       */
   400    407       if( (pEList = pNC->pEList)!=0
   401    408        && zTab==0
   402         -     && ((pNC->ncFlags & NC_AsMaybe)==0 || cnt==0)
          409  +     && cnt==0
   403    410       ){
   404    411         for(j=0; j<pEList->nExpr; j++){
   405    412           char *zAs = pEList->a[j].zName;
   406    413           if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
   407    414             Expr *pOrig;
   408    415             assert( pExpr->pLeft==0 && pExpr->pRight==0 );
   409    416             assert( pExpr->x.pList==0 );
................................................................................
   957    964     }
   958    965     return 0;
   959    966   }
   960    967   
   961    968   /*
   962    969   ** Check every term in the ORDER BY or GROUP BY clause pOrderBy of
   963    970   ** the SELECT statement pSelect.  If any term is reference to a
   964         -** result set expression (as determined by the ExprList.a.iCol field)
          971  +** result set expression (as determined by the ExprList.a.iOrderByCol field)
   965    972   ** then convert that term into a copy of the corresponding result set
   966    973   ** column.
   967    974   **
   968    975   ** If any errors are detected, add an error message to pParse and
   969    976   ** return non-zero.  Return zero if no errors are seen.
   970    977   */
   971    978   int sqlite3ResolveOrderGroupBy(
................................................................................
  1031   1038     int nResult;                   /* Number of terms in the result set */
  1032   1039   
  1033   1040     if( pOrderBy==0 ) return 0;
  1034   1041     nResult = pSelect->pEList->nExpr;
  1035   1042     pParse = pNC->pParse;
  1036   1043     for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
  1037   1044       Expr *pE = pItem->pExpr;
  1038         -    iCol = resolveAsName(pParse, pSelect->pEList, pE);
         1045  +    Expr *pE2 = sqlite3ExprSkipCollate(pE);
         1046  +    iCol = resolveAsName(pParse, pSelect->pEList, pE2);
  1039   1047       if( iCol>0 ){
  1040   1048         /* If an AS-name match is found, mark this ORDER BY column as being
  1041   1049         ** a copy of the iCol-th result-set column.  The subsequent call to
  1042   1050         ** sqlite3ResolveOrderGroupBy() will convert the expression to a
  1043   1051         ** copy of the iCol-th result-set expression. */
  1044   1052         pItem->iOrderByCol = (u16)iCol;
  1045   1053         continue;
  1046   1054       }
  1047         -    if( sqlite3ExprIsInteger(sqlite3ExprSkipCollate(pE), &iCol) ){
         1055  +    if( sqlite3ExprIsInteger(pE2, &iCol) ){
  1048   1056         /* The ORDER BY term is an integer constant.  Again, set the column
  1049   1057         ** number so that sqlite3ResolveOrderGroupBy() will convert the
  1050   1058         ** order-by term to a copy of the result-set expression */
  1051   1059         if( iCol<1 || iCol>0xffff ){
  1052   1060           resolveOutOfRangeError(pParse, zType, i+1, nResult);
  1053   1061           return 1;
  1054   1062         }
................................................................................
  1192   1200       ** expressions in the WHERE clause (etc.) can refer to expressions by
  1193   1201       ** aliases in the result set.
  1194   1202       **
  1195   1203       ** Minor point: If this is the case, then the expression will be
  1196   1204       ** re-evaluated for each reference to it.
  1197   1205       */
  1198   1206       sNC.pEList = p->pEList;
  1199         -    sNC.ncFlags |= NC_AsMaybe;
  1200   1207       if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
  1201   1208       if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;
  1202         -    sNC.ncFlags &= ~NC_AsMaybe;
  1203   1209   
  1204   1210       /* The ORDER BY and GROUP BY clauses may not refer to terms in
  1205   1211       ** outer queries 
  1206   1212       */
  1207   1213       sNC.pNext = 0;
  1208   1214       sNC.ncFlags |= NC_AllowAgg;
  1209   1215   

Changes to src/sqliteInt.h.

  2017   2017   /*
  2018   2018   ** Allowed values for the NameContext, ncFlags field.
  2019   2019   */
  2020   2020   #define NC_AllowAgg  0x01    /* Aggregate functions are allowed here */
  2021   2021   #define NC_HasAgg    0x02    /* One or more aggregate functions seen */
  2022   2022   #define NC_IsCheck   0x04    /* True if resolving names in a CHECK constraint */
  2023   2023   #define NC_InAggFunc 0x08    /* True if analyzing arguments to an agg func */
  2024         -#define NC_AsMaybe   0x10    /* Resolve to AS terms of the result set only
  2025         -                             ** if no other resolution is available */
  2026         -#define NC_PartIdx   0x20    /* True if resolving a partial index WHERE */
         2024  +#define NC_PartIdx   0x10    /* True if resolving a partial index WHERE */
  2027   2025   
  2028   2026   /*
  2029   2027   ** An instance of the following structure contains all information
  2030   2028   ** needed to generate code for a single SELECT statement.
  2031   2029   **
  2032   2030   ** nLimit is set to -1 if there is no LIMIT clause.  nOffset is set to 0.
  2033   2031   ** If there is a LIMIT clause, the parser sets nLimit to the value of the

Changes to test/resolver01.test.

     9      9   #
    10     10   #***********************************************************************
    11     11   #
    12     12   # This file tests features of the name resolver (the component that
    13     13   # figures out what identifiers in the SQL statement refer to) that
    14     14   # were fixed by ticket [2500cdb9be]
    15     15   #
           16  +# See also tickets [1c69be2daf] and [f617ea3125] from 2013-08-14.
           17  +#
    16     18   
    17     19   set testdir [file dirname $argv0]
    18     20   source $testdir/tester.tcl
    19     21   
           22  +# "ORDER BY y" binds to the output result-set column named "y"
           23  +# if available.  If no output column is named "y", then try to
           24  +# bind against an input column named "y".
           25  +#
           26  +# This is classical SQL92 behavior.
           27  +#
    20     28   do_test resolver01-1.1 {
    21     29     catchsql {
    22     30       CREATE TABLE t1(x, y); INSERT INTO t1 VALUES(11,22);
    23     31       CREATE TABLE t2(y, z); INSERT INTO t2 VALUES(33,44);
    24     32       SELECT 1 AS y FROM t1, t2 ORDER BY y;
    25     33     }
    26     34   } {0 1}
    27     35   do_test resolver01-1.2 {
           36  +  catchsql {
           37  +    SELECT 1 AS yy FROM t1, t2 ORDER BY y;
           38  +  }
           39  +} {1 {ambiguous column name: y}}
           40  +do_test resolver01-1.3 {
           41  +  catchsql {
           42  +    CREATE TABLE t3(x,y); INSERT INTO t3 VALUES(11,44),(33,22);
           43  +    SELECT x AS y FROM t3 ORDER BY y;
           44  +  }
           45  +} {0 {11 33}}
           46  +do_test resolver01-1.4 {
           47  +  catchsql {
           48  +    SELECT x AS yy FROM t3 ORDER BY y;
           49  +  }
           50  +} {0 {33 11}}
           51  +
           52  +# SQLite allows the WHERE clause to reference output columns if there is
           53  +# no other way to resolve the name.
           54  +#
           55  +do_test resolver01-1.5 {
           56  +  catchsql {
           57  +    SELECT x AS yy FROM t3 ORDER BY yy;
           58  +  }
           59  +} {0 {11 33}}
           60  +do_test resolver01-1.6 {
           61  +  catchsql {
           62  +    SELECT x AS yy FROM t3 ORDER BY 1;
           63  +  }
           64  +} {0 {11 33}}
           65  +
           66  +# The "ORDER BY y COLLATE nocase" form works the same as "ORDER BY y".
           67  +# The "y" binds more tightly to output columns than to input columns.
           68  +#
           69  +# This is for compatibility with SQL92 and with historical SQLite behavior.
           70  +# Note that PostgreSQL considers "y COLLATE nocase" to be an expression
           71  +# and thus PostgreSQL treats this case as if it where the 3.x case below.
           72  +#
           73  +do_test resolver01-2.1 {
    28     74     catchsql {
    29     75       SELECT 2 AS y FROM t1, t2 ORDER BY y COLLATE nocase;
    30     76     }
    31     77   } {0 2}
    32         -do_test resolver01-1.3 {
           78  +do_test resolver01-2.2 {
           79  +  catchsql {
           80  +    SELECT 2 AS yy FROM t1, t2 ORDER BY y COLLATE nocase;
           81  +  }
           82  +} {1 {ambiguous column name: y}}
           83  +do_test resolver01-2.3 {
           84  +  catchsql {
           85  +    SELECT x AS y FROM t3 ORDER BY y COLLATE nocase;
           86  +  }
           87  +} {0 {11 33}}
           88  +do_test resolver01-2.4 {
           89  +  catchsql {
           90  +    SELECT x AS yy FROM t3 ORDER BY y COLLATE nocase;
           91  +  }
           92  +} {0 {33 11}}
           93  +do_test resolver01-2.5 {
           94  +  catchsql {
           95  +    SELECT x AS yy FROM t3 ORDER BY yy COLLATE nocase;
           96  +  }
           97  +} {0 {11 33}}
           98  +do_test resolver01-2.6 {
           99  +  catchsql {
          100  +    SELECT x AS yy FROM t3 ORDER BY 1 COLLATE nocase;
          101  +  }
          102  +} {0 {11 33}}
          103  +
          104  +# But if the form is "ORDER BY expr" then bind more tightly to the
          105  +# the input column names and only use the output column names if no
          106  +# input column name matches.
          107  +#
          108  +# This is SQL99 behavior, as implemented by PostgreSQL and MS-SQL.
          109  +# Note that Oracle works differently.
          110  +#
          111  +do_test resolver01-3.1 {
    33    112     catchsql {
    34    113       SELECT 3 AS y FROM t1, t2 ORDER BY +y;
    35    114     }
    36         -} {0 3}
          115  +} {1 {ambiguous column name: y}}
          116  +do_test resolver01-3.2 {
          117  +  catchsql {
          118  +    SELECT 2 AS yy FROM t1, t2 ORDER BY +y;
          119  +  }
          120  +} {1 {ambiguous column name: y}}
          121  +do_test resolver01-3.3 {
          122  +  catchsql {
          123  +    SELECT x AS y FROM t3 ORDER BY +y;
          124  +  }
          125  +} {0 {33 11}}
          126  +do_test resolver01-3.4 {
          127  +  catchsql {
          128  +    SELECT x AS yy FROM t3 ORDER BY +y;
          129  +  }
          130  +} {0 {33 11}}
          131  +do_test resolver01-3.5 {
          132  +  catchsql {
          133  +    SELECT x AS yy FROM t3 ORDER BY +yy
          134  +  }
          135  +} {0 {11 33}}
    37    136   
          137  +# This is the test case given in ticket [f617ea3125e9] (with table name
          138  +# changed from "t1" to "t4".  The behavior of (1) and (3) match with
          139  +# PostgreSQL, but we intentionally break with PostgreSQL to provide
          140  +# SQL92 behavior for case (2).
          141  +#
          142  +do_execsql_test resolver01-4.1 {
          143  +  CREATE TABLE t4(m CHAR(2));
          144  +  INSERT INTO t4 VALUES('az');
          145  +  INSERT INTO t4 VALUES('by');
          146  +  INSERT INTO t4 VALUES('cx');
          147  +  SELECT '1', substr(m,2) AS m FROM t4 ORDER BY m;
          148  +  SELECT '2', substr(m,2) AS m FROM t4 ORDER BY m COLLATE binary;
          149  +  SELECT '3', substr(m,2) AS m FROM t4 ORDER BY lower(m);
          150  +} {1 x 1 y 1 z 2 x 2 y 2 z 3 z 3 y 3 x}
    38    151   
    39    152   finish_test

Changes to test/tkt2822.test.

   204    204     execsql {
   205    205       SELECT a AS "b" FROM t3 ORDER BY [B];
   206    206     }
   207    207   } {1 9}
   208    208   
   209    209   # In "ORDER BY +b" the term is now an expression rather than
   210    210   # a label.  It therefore matches by rule (3) instead of rule (2).
   211         -#
   212         -# 2013-04-13:  This is busted.  Changed to conform to PostgreSQL and
   213         -# MySQL and Oracle behavior.
   214    211   # 
   215    212   do_test tkt2822-5.5 {
   216    213     execsql {
   217    214       SELECT a AS b FROM t3 ORDER BY +b;
   218    215     }
   219         -} {1 9}
          216  +} {9 1}
   220    217   
   221    218   # Tests for rule 2 in compound queries
   222    219   #
   223    220   do_test tkt2822-6.1 {
   224    221     execsql {
   225    222       CREATE TABLE t6a(p,q);
   226    223       INSERT INTO t6a VALUES(1,8);