/ Check-in [c78b357c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Bare identifiers in ORDER BY clauses bind more tightly to output column name, but identifiers in expressions bind more tightly to input column names. This is a compromise between SQL92 and SQL99 behavior and is what PostgreSQL and MS-SQL do. Ticket [f617ea3125e9c].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c78b357c00a35ed48ce2ffbc041de8d22570d1e2
User & Date: drh 2013-08-15 20:24:27
Context
2013-08-15
22:40
Make sure that GROUP BY terms select input column names in preference to output column names, in compliance with the SQL standard. Ticket [1c69be2dafc28]. check-in: f2d175f9 user: drh tags: trunk
20:24
Bare identifiers in ORDER BY clauses bind more tightly to output column name, but identifiers in expressions bind more tightly to input column names. This is a compromise between SQL92 and SQL99 behavior and is what PostgreSQL and MS-SQL do. Ticket [f617ea3125e9c]. check-in: c78b357c user: drh tags: trunk
20:05
Make it easy to attach a debugger the test fixture process prior to any tests being run. check-in: 53cd9ebf user: mistachkin tags: trunk
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/resolve.c.

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

70
71
72
73
74
75
76
...
392
393
394
395
396
397
398






399
400
401
402
403
404
405
406
407
408
409
...
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
....
1031
1032
1033
1034
1035
1036
1037

1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
....
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
** TK_AS operator.  The TK_AS operator causes the expression to be
** evaluated just once and then reused for each alias.
**
** The reason for suppressing the TK_AS term when the expression is a simple
** column reference is so that the column reference will be recognized as
** usable by indices within the WHERE clause processing logic. 
**
** Hack:  The TK_AS operator is inhibited if zType[0]=='G'.  This means
** that in a GROUP BY clause, the expression is evaluated twice.  Hence:
**
**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY x
**
** Is equivalent to:
**
**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY random()%5
**
** The result of random()%5 in the GROUP BY clause is probably different
** from the result in the result-set.  We might fix this someday.  Or
** then again, we might not...

**
** If the reference is followed by a COLLATE operator, then make sure
** the COLLATE operator is preserved.  For example:
**
**     SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase;
**
** Should be transformed into:
................................................................................
    **
    **     SELECT a+b AS x FROM table WHERE x<10;
    **
    ** In cases like this, replace pExpr with a copy of the expression that
    ** forms the result set entry ("a+b" in the example) and return immediately.
    ** Note that the expression in the result set should have already been
    ** resolved by the time the WHERE clause is resolved.






    */
    if( (pEList = pNC->pEList)!=0
     && zTab==0
     && ((pNC->ncFlags & NC_AsMaybe)==0 || cnt==0)
    ){
      for(j=0; j<pEList->nExpr; j++){
        char *zAs = pEList->a[j].zName;
        if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
          Expr *pOrig;
          assert( pExpr->pLeft==0 && pExpr->pRight==0 );
          assert( pExpr->x.pList==0 );
................................................................................
  }
  return 0;
}

/*
** Check every term in the ORDER BY or GROUP BY clause pOrderBy of
** the SELECT statement pSelect.  If any term is reference to a
** result set expression (as determined by the ExprList.a.iCol field)
** then convert that term into a copy of the corresponding result set
** column.
**
** If any errors are detected, add an error message to pParse and
** return non-zero.  Return zero if no errors are seen.
*/
int sqlite3ResolveOrderGroupBy(
................................................................................
  int nResult;                   /* Number of terms in the result set */

  if( pOrderBy==0 ) return 0;
  nResult = pSelect->pEList->nExpr;
  pParse = pNC->pParse;
  for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
    Expr *pE = pItem->pExpr;

    iCol = resolveAsName(pParse, pSelect->pEList, pE);
    if( iCol>0 ){
      /* If an AS-name match is found, mark this ORDER BY column as being
      ** a copy of the iCol-th result-set column.  The subsequent call to
      ** sqlite3ResolveOrderGroupBy() will convert the expression to a
      ** copy of the iCol-th result-set expression. */
      pItem->iOrderByCol = (u16)iCol;
      continue;
    }
    if( sqlite3ExprIsInteger(sqlite3ExprSkipCollate(pE), &iCol) ){
      /* The ORDER BY term is an integer constant.  Again, set the column
      ** number so that sqlite3ResolveOrderGroupBy() will convert the
      ** order-by term to a copy of the result-set expression */
      if( iCol<1 || iCol>0xffff ){
        resolveOutOfRangeError(pParse, zType, i+1, nResult);
        return 1;
      }
................................................................................
    ** expressions in the WHERE clause (etc.) can refer to expressions by
    ** aliases in the result set.
    **
    ** Minor point: If this is the case, then the expression will be
    ** re-evaluated for each reference to it.
    */
    sNC.pEList = p->pEList;
    sNC.ncFlags |= NC_AsMaybe;
    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;
    sNC.ncFlags &= ~NC_AsMaybe;

    /* The ORDER BY and GROUP BY clauses may not refer to terms in
    ** outer queries 
    */
    sNC.pNext = 0;
    sNC.ncFlags |= NC_AllowAgg;








|









|
|
>







 







>
>
>
>
>
>



|







 







|







 







>
|








|







 







<


<







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
...
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
...
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
....
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
....
1200
1201
1202
1203
1204
1205
1206

1207
1208

1209
1210
1211
1212
1213
1214
1215
** TK_AS operator.  The TK_AS operator causes the expression to be
** evaluated just once and then reused for each alias.
**
** The reason for suppressing the TK_AS term when the expression is a simple
** column reference is so that the column reference will be recognized as
** usable by indices within the WHERE clause processing logic. 
**
** The TK_AS operator is inhibited if zType[0]=='G'.  This means
** that in a GROUP BY clause, the expression is evaluated twice.  Hence:
**
**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY x
**
** Is equivalent to:
**
**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY random()%5
**
** The result of random()%5 in the GROUP BY clause is probably different
** from the result in the result-set.  On the other hand Standard SQL does
** not allow the GROUP BY clause to contain references to result-set columns.
** So this should never come up in well-formed queries.
**
** If the reference is followed by a COLLATE operator, then make sure
** the COLLATE operator is preserved.  For example:
**
**     SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase;
**
** Should be transformed into:
................................................................................
    **
    **     SELECT a+b AS x FROM table WHERE x<10;
    **
    ** In cases like this, replace pExpr with a copy of the expression that
    ** forms the result set entry ("a+b" in the example) and return immediately.
    ** Note that the expression in the result set should have already been
    ** resolved by the time the WHERE clause is resolved.
    **
    ** The ability to use an output result-set column in the WHERE, GROUP BY,
    ** or HAVING clauses, or as part of a larger expression in the ORDRE BY
    ** clause is not standard SQL.  This is a (goofy) SQLite extension, that
    ** is supported for backwards compatibility only.  TO DO: Issue a warning
    ** on sqlite3_log() whenever the capability is used.
    */
    if( (pEList = pNC->pEList)!=0
     && zTab==0
     && cnt==0
    ){
      for(j=0; j<pEList->nExpr; j++){
        char *zAs = pEList->a[j].zName;
        if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
          Expr *pOrig;
          assert( pExpr->pLeft==0 && pExpr->pRight==0 );
          assert( pExpr->x.pList==0 );
................................................................................
  }
  return 0;
}

/*
** Check every term in the ORDER BY or GROUP BY clause pOrderBy of
** the SELECT statement pSelect.  If any term is reference to a
** result set expression (as determined by the ExprList.a.iOrderByCol field)
** then convert that term into a copy of the corresponding result set
** column.
**
** If any errors are detected, add an error message to pParse and
** return non-zero.  Return zero if no errors are seen.
*/
int sqlite3ResolveOrderGroupBy(
................................................................................
  int nResult;                   /* Number of terms in the result set */

  if( pOrderBy==0 ) return 0;
  nResult = pSelect->pEList->nExpr;
  pParse = pNC->pParse;
  for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
    Expr *pE = pItem->pExpr;
    Expr *pE2 = sqlite3ExprSkipCollate(pE);
    iCol = resolveAsName(pParse, pSelect->pEList, pE2);
    if( iCol>0 ){
      /* If an AS-name match is found, mark this ORDER BY column as being
      ** a copy of the iCol-th result-set column.  The subsequent call to
      ** sqlite3ResolveOrderGroupBy() will convert the expression to a
      ** copy of the iCol-th result-set expression. */
      pItem->iOrderByCol = (u16)iCol;
      continue;
    }
    if( sqlite3ExprIsInteger(pE2, &iCol) ){
      /* The ORDER BY term is an integer constant.  Again, set the column
      ** number so that sqlite3ResolveOrderGroupBy() will convert the
      ** order-by term to a copy of the result-set expression */
      if( iCol<1 || iCol>0xffff ){
        resolveOutOfRangeError(pParse, zType, i+1, nResult);
        return 1;
      }
................................................................................
    ** expressions in the WHERE clause (etc.) can refer to expressions by
    ** aliases in the result set.
    **
    ** Minor point: If this is the case, then the expression will be
    ** re-evaluated for each reference to it.
    */
    sNC.pEList = p->pEList;

    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;


    /* The ORDER BY and GROUP BY clauses may not refer to terms in
    ** outer queries 
    */
    sNC.pNext = 0;
    sNC.ncFlags |= NC_AllowAgg;

Changes to src/sqliteInt.h.

2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
/*
** Allowed values for the NameContext, ncFlags field.
*/
#define NC_AllowAgg  0x01    /* Aggregate functions are allowed here */
#define NC_HasAgg    0x02    /* One or more aggregate functions seen */
#define NC_IsCheck   0x04    /* True if resolving names in a CHECK constraint */
#define NC_InAggFunc 0x08    /* True if analyzing arguments to an agg func */
#define NC_AsMaybe   0x10    /* Resolve to AS terms of the result set only
                             ** if no other resolution is available */
#define NC_PartIdx   0x20    /* True if resolving a partial index WHERE */

/*
** An instance of the following structure contains all information
** needed to generate code for a single SELECT statement.
**
** nLimit is set to -1 if there is no LIMIT clause.  nOffset is set to 0.
** If there is a LIMIT clause, the parser sets nLimit to the value of the







<
<
|







2017
2018
2019
2020
2021
2022
2023


2024
2025
2026
2027
2028
2029
2030
2031
/*
** Allowed values for the NameContext, ncFlags field.
*/
#define NC_AllowAgg  0x01    /* Aggregate functions are allowed here */
#define NC_HasAgg    0x02    /* One or more aggregate functions seen */
#define NC_IsCheck   0x04    /* True if resolving names in a CHECK constraint */
#define NC_InAggFunc 0x08    /* True if analyzing arguments to an agg func */


#define NC_PartIdx   0x10    /* True if resolving a partial index WHERE */

/*
** An instance of the following structure contains all information
** needed to generate code for a single SELECT statement.
**
** nLimit is set to -1 if there is no LIMIT clause.  nOffset is set to 0.
** If there is a LIMIT clause, the parser sets nLimit to the value of the

Changes to test/resolver01.test.

9
10
11
12
13
14
15


16
17
18
19






20
21
22
23
24
25
26
27
28






































29
30
31
32
33

































34
35










36



37




38

















39
#
#***********************************************************************
#
# This file tests features of the name resolver (the component that
# figures out what identifiers in the SQL statement refer to) that
# were fixed by ticket [2500cdb9be]
#



set testdir [file dirname $argv0]
source $testdir/tester.tcl







do_test resolver01-1.1 {
  catchsql {
    CREATE TABLE t1(x, y); INSERT INTO t1 VALUES(11,22);
    CREATE TABLE t2(y, z); INSERT INTO t2 VALUES(33,44);
    SELECT 1 AS y FROM t1, t2 ORDER BY y;
  }
} {0 1}
do_test resolver01-1.2 {
  catchsql {






































    SELECT 2 AS y FROM t1, t2 ORDER BY y COLLATE nocase;
  }
} {0 2}
do_test resolver01-1.3 {
  catchsql {

































    SELECT 3 AS y FROM t1, t2 ORDER BY +y;
  }










} {0 3}


























finish_test







>
>




>
>
>
>
>
>









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#
#***********************************************************************
#
# This file tests features of the name resolver (the component that
# figures out what identifiers in the SQL statement refer to) that
# were fixed by ticket [2500cdb9be]
#
# See also tickets [1c69be2daf] and [f617ea3125] from 2013-08-14.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# "ORDER BY y" binds to the output result-set column named "y"
# if available.  If no output column is named "y", then try to
# bind against an input column named "y".
#
# This is classical SQL92 behavior.
#
do_test resolver01-1.1 {
  catchsql {
    CREATE TABLE t1(x, y); INSERT INTO t1 VALUES(11,22);
    CREATE TABLE t2(y, z); INSERT INTO t2 VALUES(33,44);
    SELECT 1 AS y FROM t1, t2 ORDER BY y;
  }
} {0 1}
do_test resolver01-1.2 {
  catchsql {
    SELECT 1 AS yy FROM t1, t2 ORDER BY y;
  }
} {1 {ambiguous column name: y}}
do_test resolver01-1.3 {
  catchsql {
    CREATE TABLE t3(x,y); INSERT INTO t3 VALUES(11,44),(33,22);
    SELECT x AS y FROM t3 ORDER BY y;
  }
} {0 {11 33}}
do_test resolver01-1.4 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY y;
  }
} {0 {33 11}}

# SQLite allows the WHERE clause to reference output columns if there is
# no other way to resolve the name.
#
do_test resolver01-1.5 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY yy;
  }
} {0 {11 33}}
do_test resolver01-1.6 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY 1;
  }
} {0 {11 33}}

# The "ORDER BY y COLLATE nocase" form works the same as "ORDER BY y".
# The "y" binds more tightly to output columns than to input columns.
#
# This is for compatibility with SQL92 and with historical SQLite behavior.
# Note that PostgreSQL considers "y COLLATE nocase" to be an expression
# and thus PostgreSQL treats this case as if it where the 3.x case below.
#
do_test resolver01-2.1 {
  catchsql {
    SELECT 2 AS y FROM t1, t2 ORDER BY y COLLATE nocase;
  }
} {0 2}
do_test resolver01-2.2 {
  catchsql {
    SELECT 2 AS yy FROM t1, t2 ORDER BY y COLLATE nocase;
  }
} {1 {ambiguous column name: y}}
do_test resolver01-2.3 {
  catchsql {
    SELECT x AS y FROM t3 ORDER BY y COLLATE nocase;
  }
} {0 {11 33}}
do_test resolver01-2.4 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY y COLLATE nocase;
  }
} {0 {33 11}}
do_test resolver01-2.5 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY yy COLLATE nocase;
  }
} {0 {11 33}}
do_test resolver01-2.6 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY 1 COLLATE nocase;
  }
} {0 {11 33}}

# But if the form is "ORDER BY expr" then bind more tightly to the
# the input column names and only use the output column names if no
# input column name matches.
#
# This is SQL99 behavior, as implemented by PostgreSQL and MS-SQL.
# Note that Oracle works differently.
#
do_test resolver01-3.1 {
  catchsql {
    SELECT 3 AS y FROM t1, t2 ORDER BY +y;
  }
} {1 {ambiguous column name: y}}
do_test resolver01-3.2 {
  catchsql {
    SELECT 2 AS yy FROM t1, t2 ORDER BY +y;
  }
} {1 {ambiguous column name: y}}
do_test resolver01-3.3 {
  catchsql {
    SELECT x AS y FROM t3 ORDER BY +y;
  }
} {0 {33 11}}
do_test resolver01-3.4 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY +y;
  }
} {0 {33 11}}
do_test resolver01-3.5 {
  catchsql {
    SELECT x AS yy FROM t3 ORDER BY +yy
  }
} {0 {11 33}}

# This is the test case given in ticket [f617ea3125e9] (with table name
# changed from "t1" to "t4".  The behavior of (1) and (3) match with
# PostgreSQL, but we intentionally break with PostgreSQL to provide
# SQL92 behavior for case (2).
#
do_execsql_test resolver01-4.1 {
  CREATE TABLE t4(m CHAR(2));
  INSERT INTO t4 VALUES('az');
  INSERT INTO t4 VALUES('by');
  INSERT INTO t4 VALUES('cx');
  SELECT '1', substr(m,2) AS m FROM t4 ORDER BY m;
  SELECT '2', substr(m,2) AS m FROM t4 ORDER BY m COLLATE binary;
  SELECT '3', substr(m,2) AS m FROM t4 ORDER BY lower(m);
} {1 x 1 y 1 z 2 x 2 y 2 z 3 z 3 y 3 x}

finish_test

Changes to test/tkt2822.test.

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
    SELECT a AS "b" FROM t3 ORDER BY [B];
  }
} {1 9}

# In "ORDER BY +b" the term is now an expression rather than
# a label.  It therefore matches by rule (3) instead of rule (2).
#
# 2013-04-13:  This is busted.  Changed to conform to PostgreSQL and
# MySQL and Oracle behavior.
# 
do_test tkt2822-5.5 {
  execsql {
    SELECT a AS b FROM t3 ORDER BY +b;
  }
} {1 9}

# Tests for rule 2 in compound queries
#
do_test tkt2822-6.1 {
  execsql {
    CREATE TABLE t6a(p,q);
    INSERT INTO t6a VALUES(1,8);







<
<
<




|







205
206
207
208
209
210
211



212
213
214
215
216
217
218
219
220
221
222
223
    SELECT a AS "b" FROM t3 ORDER BY [B];
  }
} {1 9}

# In "ORDER BY +b" the term is now an expression rather than
# a label.  It therefore matches by rule (3) instead of rule (2).
# 



do_test tkt2822-5.5 {
  execsql {
    SELECT a AS b FROM t3 ORDER BY +b;
  }
} {9 1}

# Tests for rule 2 in compound queries
#
do_test tkt2822-6.1 {
  execsql {
    CREATE TABLE t6a(p,q);
    INSERT INTO t6a VALUES(1,8);