SQLite

Check-in [e563312347]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Where possible, transform the DISTINCT qualifier to a GROUP BY clause. GROUP BY clauses may be optimized by indices, DISTINCT qualifiers cannot. (CVS 4538)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e56331234791cf3d830a30e4cfa66682bdf2eed1
User & Date: danielk1977 2007-11-12 15:29:19.000
Context
2007-11-12
15:40
Fix a code comment in select.c. No code changes. (CVS 4539) (check-in: cf41d6a00f user: danielk1977 tags: trunk)
15:29
Where possible, transform the DISTINCT qualifier to a GROUP BY clause. GROUP BY clauses may be optimized by indices, DISTINCT qualifiers cannot. (CVS 4538) (check-in: e563312347 user: danielk1977 tags: trunk)
09:50
Allow collation sequence names to be quoted. Ticket #2744. (CVS 4537) (check-in: 06749ac7af user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/select.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains C code routines that are called by the parser
** to handle SELECT statements in SQLite.
**
** $Id: select.c,v 1.359 2007/08/31 17:42:48 danielk1977 Exp $
*/
#include "sqliteInt.h"


/*
** Delete all the content of a Select structure but do not deallocate
** the select structure itself.







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains C code routines that are called by the parser
** to handle SELECT statements in SQLite.
**
** $Id: select.c,v 1.360 2007/11/12 15:29:19 danielk1977 Exp $
*/
#include "sqliteInt.h"


/*
** Delete all the content of a Select structure but do not deallocate
** the select structure itself.
3062
3063
3064
3065
3066
3067
3068









3069
3070
3071
3072
3073
3074
3075
#ifndef SQLITE_OMIT_VIEW
  if( pParent && pParentAgg &&
      flattenSubquery(db, pParent, parentTab, *pParentAgg, isAgg) ){
    if( isAgg ) *pParentAgg = 1;
    goto select_end;
  }
#endif










  /* If there is an ORDER BY clause, then this sorting
  ** index might end up being unused if the data can be 
  ** extracted in pre-sorted order.  If that is the case, then the
  ** OP_OpenEphemeral instruction will be changed to an OP_Noop once
  ** we figure out that the sorting index is not needed.  The addrSortIndex
  ** variable is used to facilitate that change.







>
>
>
>
>
>
>
>
>







3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
#ifndef SQLITE_OMIT_VIEW
  if( pParent && pParentAgg &&
      flattenSubquery(db, pParent, parentTab, *pParentAgg, isAgg) ){
    if( isAgg ) *pParentAgg = 1;
    goto select_end;
  }
#endif

  /* If possible, rewrite the query to use GROUP BY instead of 
  */
  if( p->isDistinct && !p->isAgg && !p->pGroupBy ){
    p->pGroupBy = sqlite3ExprListDup(db, p->pEList);
    pGroupBy = p->pGroupBy;
    p->isDistinct = 0;
    isDistinct = 0;
  }

  /* If there is an ORDER BY clause, then this sorting
  ** index might end up being unused if the data can be 
  ** extracted in pre-sorted order.  If that is the case, then the
  ** OP_OpenEphemeral instruction will be changed to an OP_Noop once
  ** we figure out that the sorting index is not needed.  The addrSortIndex
  ** variable is used to facilitate that change.
3098
3099
3100
3101
3102
3103
3104

3105
3106
3107
3108
3109
3110
3111
  iEnd = sqlite3VdbeMakeLabel(v);
  computeLimitRegisters(pParse, p, iEnd);

  /* Open a virtual index to use for the distinct set.
  */
  if( isDistinct ){
    KeyInfo *pKeyInfo;

    distinct = pParse->nTab++;
    pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
    sqlite3VdbeOp3(v, OP_OpenEphemeral, distinct, 0, 
                        (char*)pKeyInfo, P3_KEYINFO_HANDOFF);
  }else{
    distinct = -1;
  }







>







3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
  iEnd = sqlite3VdbeMakeLabel(v);
  computeLimitRegisters(pParse, p, iEnd);

  /* Open a virtual index to use for the distinct set.
  */
  if( isDistinct ){
    KeyInfo *pKeyInfo;
    assert( isAgg || pGroupBy );
    distinct = pParse->nTab++;
    pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
    sqlite3VdbeOp3(v, OP_OpenEphemeral, distinct, 0, 
                        (char*)pKeyInfo, P3_KEYINFO_HANDOFF);
  }else{
    distinct = -1;
  }
3125
3126
3127
3128
3129
3130
3131

3132
3133
3134
3135
3136
3137
3138
3139
    if( addrSortIndex>=0 && pOrderBy==0 ){
      sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
      p->addrOpenEphm[2] = -1;
    }

    /* Use the standard inner loop
    */

    if( selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, eDest,
                    iParm, pWInfo->iContinue, pWInfo->iBreak, aff) ){
       goto select_end;
    }

    /* End the database scan loop.
    */
    sqlite3WhereEnd(pWInfo);







>
|







3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
    if( addrSortIndex>=0 && pOrderBy==0 ){
      sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
      p->addrOpenEphm[2] = -1;
    }

    /* Use the standard inner loop
    */
    assert(!isDistinct);
    if( selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, eDest,
                    iParm, pWInfo->iContinue, pWInfo->iBreak, aff) ){
       goto select_end;
    }

    /* End the database scan loop.
    */
    sqlite3WhereEnd(pWInfo);
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
      if( sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->pList) ){
        goto select_end;
      }
    }
    if( db->mallocFailed ) goto select_end;

    /* Processing for aggregates with GROUP BY is very different and
    ** much more complex tha aggregates without a GROUP BY.
    */
    if( pGroupBy ){
      KeyInfo *pKeyInfo;  /* Keying information for the group by clause */

      /* Create labels that we will be needing
      */
     







|







3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
      if( sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->pList) ){
        goto select_end;
      }
    }
    if( db->mallocFailed ) goto select_end;

    /* Processing for aggregates with GROUP BY is very different and
    ** much more complex than aggregates without a GROUP BY.
    */
    if( pGroupBy ){
      KeyInfo *pKeyInfo;  /* Keying information for the group by clause */

      /* Create labels that we will be needing
      */
     
Changes to test/collate5.test.
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing DISTINCT, UNION, INTERSECT and EXCEPT
# SELECT statements that use user-defined collation sequences. Also
# GROUP BY clauses that use user-defined collation sequences.
#
# $Id: collate5.test,v 1.5 2005/09/07 22:48:16 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl


#
# Tests are organised as follows:







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing DISTINCT, UNION, INTERSECT and EXCEPT
# SELECT statements that use user-defined collation sequences. Also
# GROUP BY clauses that use user-defined collation sequences.
#
# $Id: collate5.test,v 1.6 2007/11/12 15:29:19 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl


#
# Tests are organised as follows:
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    INSERT INTO collate5t1 VALUES('N', NULL);
  } 
} {}
do_test collate5-1.1 {
  execsql {
    SELECT DISTINCT a FROM collate5t1;
  }
} {a b n}
do_test collate5-1.2 {
  execsql {
    SELECT DISTINCT b FROM collate5t1;
  }
} {apple Apple banana {}}
do_test collate5-1.3 {
  execsql {
    SELECT DISTINCT a, b FROM collate5t1;
  }
} {a apple A Apple b banana n {}}

# The remainder of this file tests compound SELECT statements.
# Omit it if the library is compiled such that they are omitted.
#
ifcapable !compound {
  finish_test
  return







|




|




|







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    INSERT INTO collate5t1 VALUES('N', NULL);
  } 
} {}
do_test collate5-1.1 {
  execsql {
    SELECT DISTINCT a FROM collate5t1;
  }
} {A B N}
do_test collate5-1.2 {
  execsql {
    SELECT DISTINCT b FROM collate5t1;
  }
} {{} Apple apple banana}
do_test collate5-1.3 {
  execsql {
    SELECT DISTINCT a, b FROM collate5t1;
  }
} {A Apple a apple B banana N {}}

# The remainder of this file tests compound SELECT statements.
# Omit it if the library is compiled such that they are omitted.
#
ifcapable !compound {
  finish_test
  return
Changes to test/insert4.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2007 January 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the INSERT transfer optimization.
#
# $Id: insert4.test,v 1.8 2007/10/09 08:29:32 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !view||!subquery {
  finish_test
  return













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2007 January 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the INSERT transfer optimization.
#
# $Id: insert4.test,v 1.9 2007/11/12 15:29:19 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !view||!subquery {
  finish_test
  return
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#
do_test insert4-2.4.1 {
  execsql {
    DELETE FROM t3;
    INSERT INTO t3 SELECT DISTINCT * FROM t2;
    SELECT * FROM t3;
  }
} {9 1 1 9}
xferopt_test insert4-2.4.2 0
do_test insert4-2.4.3 {
  catchsql {
    DELETE FROM t1;
    INSERT INTO t1 SELECT DISTINCT * FROM t2;
  }
} {1 {constraint failed}}







|







108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#
do_test insert4-2.4.1 {
  execsql {
    DELETE FROM t3;
    INSERT INTO t3 SELECT DISTINCT * FROM t2;
    SELECT * FROM t3;
  }
} {1 9 9 1}
xferopt_test insert4-2.4.2 0
do_test insert4-2.4.3 {
  catchsql {
    DELETE FROM t1;
    INSERT INTO t1 SELECT DISTINCT * FROM t2;
  }
} {1 {constraint failed}}
Changes to test/misc5.test.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#
# This file implements tests for miscellanous features that were
# left out of other test files.
#
# $Id: misc5.test,v 1.17 2007/09/12 17:01:45 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build records using the MakeRecord opcode such that the size of the 
# header is at the transition point in the size of a varint.
#







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#
# This file implements tests for miscellanous features that were
# left out of other test files.
#
# $Id: misc5.test,v 1.18 2007/11/12 15:29:19 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build records using the MakeRecord opcode such that the size of the 
# header is at the transition point in the size of a varint.
#
477
478
479
480
481
482
483




484
485






486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
      FROM (    
       SELECT DISTINCT artist    
       FROM songs      
       WHERE songid IN (    
        SELECT songid    
        FROM songs    
        WHERE LOWER(artist) = (    




          SELECT DISTINCT LOWER(artist)    
          FROM (      






            SELECT DISTINCT artist,sum(timesplayed) AS total      
            FROM songs      
            GROUP BY LOWER(artist)      
            ORDER BY total DESC      
            LIMIT 10    
          )    
          WHERE artist <> '' 
        )  
       )       
      )  
      ORDER BY LOWER(artist) ASC;
    }
  } {two}
}

# Ticket #1370.  Do not overwrite small files (less than 1024 bytes)
# when trying to open them as a database.
#
do_test misc5-4.1 {
  db close







>
>
>
>


>
>
>
>
>
>












|







477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
      FROM (    
       SELECT DISTINCT artist    
       FROM songs      
       WHERE songid IN (    
        SELECT songid    
        FROM songs    
        WHERE LOWER(artist) = (    
          -- This sub-query is indeterminate. Because there is no ORDER BY,
          -- it may return 'one', 'two' or 'three'. Because of this, the
	  -- outermost parent query may correctly return any of 'one', 'two' 
          -- or 'three' as well.
          SELECT DISTINCT LOWER(artist)    
          FROM (      
            -- This sub-query returns the table:
            --
            --     two      14
            --     one      10
            --     three    5
            --
            SELECT DISTINCT artist,sum(timesplayed) AS total      
            FROM songs      
            GROUP BY LOWER(artist)      
            ORDER BY total DESC      
            LIMIT 10    
          )    
          WHERE artist <> '' 
        )  
       )       
      )  
      ORDER BY LOWER(artist) ASC;
    }
  } {one}
}

# Ticket #1370.  Do not overwrite small files (less than 1024 bytes)
# when trying to open them as a database.
#
do_test misc5-4.1 {
  db close