/ Check-in [7b8548b1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not consider a DISTINCT clause redundant unless a subset of the result-set is collectively subject to a UNIQUE constraint and it can be guaranteed that all columns of the subset are NOT NULL (either due to NOT NULL constraints WHERE clause terms). Fix for [385a5b56b9].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b8548b1872cc1225355ba8311e93dd08d6526e2
User & Date: dan 2012-04-20 16:59:24
References
2012-04-20
17:44 Closed ticket [385a5b56]: A DISTINCT SELECT optimized using a UNIQUE index may allow duplicate NULL values. plus 3 other changes artifact: 40ce6655 user: dan
Context
2012-04-21
11:33
If terminating interactive input to the command-line shell with ^D, issue an extra \n to move the cursor to the next line before exiting. This check-in also accidently adds the test_spellfix.c file to the source tree. check-in: feff1ef0 user: drh tags: trunk
00:31
Merge the latest trunk changes into the WinRT branch (fixes for tickets [2a5629202f] and [385a5b56b9]). check-in: 25478dcf user: mistachkin tags: winrt
2012-04-20
16:59
Do not consider a DISTINCT clause redundant unless a subset of the result-set is collectively subject to a UNIQUE constraint and it can be guaranteed that all columns of the subset are NOT NULL (either due to NOT NULL constraints WHERE clause terms). Fix for [385a5b56b9]. check-in: 7b8548b1 user: dan tags: trunk
15:24
Fix for [2a5629202f]. When considering whether or not a UNIQUE index may be used to optimize an ORDER BY clause, do not assume that all index entries are distinct unless there is some reason to believe that the index contains no NULL values. check-in: 9870e4c4 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

1558
1559
1560
1561
1562
1563
1564



1565
1566
1567
1568
1569
1570
1571
1572
1573

1574
1575
1576
1577
1578
1579
1580
....
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
  **
  **   1. The index is itself UNIQUE, and
  **
  **   2. All of the columns in the index are either part of the pDistinct
  **      list, or else the WHERE clause contains a term of the form "col=X",
  **      where X is a constant value. The collation sequences of the
  **      comparison and select-list expressions must match those of the index.



  */
  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    if( pIdx->onError==OE_None ) continue;
    for(i=0; i<pIdx->nColumn; i++){
      int iCol = pIdx->aiColumn[i];
      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) 
       && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
      ){
        break;

      }
    }
    if( i==pIdx->nColumn ){
      /* This index implies that the DISTINCT qualifier is redundant. */
      return 1;
    }
  }
................................................................................
    ** NULL (since the corresponding "=" operator in the WHERE clause would 
    ** not be true). So if all remaining index columns have NOT NULL 
    ** constaints attached to them, we can be confident that the visited
    ** index entries are free of NULLs.  */
    for(i=nEqCol; i<pIdx->nColumn; i++){
      if( aCol[pIdx->aiColumn[i]].notNull==0 ) break;
    }
    return (i>=pIdx->nColumn);
  }
  return 0;
}

/*
** Prepare a crude estimate of the logarithm of the input value.
** The results need not be exact.  This is only used for estimating







>
>
>





|
|
|
|
>







 







|







1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
....
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
  **
  **   1. The index is itself UNIQUE, and
  **
  **   2. All of the columns in the index are either part of the pDistinct
  **      list, or else the WHERE clause contains a term of the form "col=X",
  **      where X is a constant value. The collation sequences of the
  **      comparison and select-list expressions must match those of the index.
  **
  **   3. All of those index columns for which the WHERE clause does not
  **      contain a "col=X" term are subject to a NOT NULL constraint.
  */
  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    if( pIdx->onError==OE_None ) continue;
    for(i=0; i<pIdx->nColumn; i++){
      int iCol = pIdx->aiColumn[i];
      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
        int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i);
        if( iIdxCol<0 || pTab->aCol[pIdx->aiColumn[i]].notNull==0 ){
          break;
        }
      }
    }
    if( i==pIdx->nColumn ){
      /* This index implies that the DISTINCT qualifier is redundant. */
      return 1;
    }
  }
................................................................................
    ** NULL (since the corresponding "=" operator in the WHERE clause would 
    ** not be true). So if all remaining index columns have NOT NULL 
    ** constaints attached to them, we can be confident that the visited
    ** index entries are free of NULLs.  */
    for(i=nEqCol; i<pIdx->nColumn; i++){
      if( aCol[pIdx->aiColumn[i]].notNull==0 ) break;
    }
    return (i==pIdx->nColumn);
  }
  return 0;
}

/*
** Prepare a crude estimate of the logarithm of the input value.
** The results need not be exact.  This is only used for estimating

Changes to test/distinct.test.

73
74
75
76
77
78
79
80
81




82
83
84

85
86

87
88
89
90
91
92

93
94
95
96
97
98
99
100
101

102


103

104

105
106
107
108
109
110
111
112
...
116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
do_execsql_test 1.0 {
  CREATE TABLE t1(a, b, c, d);
  CREATE UNIQUE INDEX i1 ON t1(b, c);
  CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase);

  CREATE TABLE t2(x INTEGER PRIMARY KEY, y);

  CREATE TABLE t3(c1 PRIMARY KEY, c2);
  CREATE INDEX i3 ON t3(c2);




}
foreach {tn noop sql} {


  1   1   "SELECT DISTINCT b, c FROM t1"
  2   1   "SELECT DISTINCT c FROM t1 WHERE b = ?"

  3   1   "SELECT DISTINCT rowid FROM t1"
  4   1   "SELECT DISTINCT rowid, a FROM t1"
  5   1   "SELECT DISTINCT x FROM t2"
  6   1   "SELECT DISTINCT * FROM t2"
  7   1   "SELECT DISTINCT * FROM (SELECT * FROM t2)"


  8   1   "SELECT DISTINCT * FROM t1"

  8   0   "SELECT DISTINCT a, b FROM t1"

  9   0   "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)"
  10  0   "SELECT DISTINCT c FROM t1"
  11  0   "SELECT DISTINCT b FROM t1"

  12  0   "SELECT DISTINCT a, d FROM t1"

  13  0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"


  14  1   "SELECT DISTINCT a, d COLLATE nocase FROM t1"

  15  0   "SELECT DISTINCT a, d COLLATE binary FROM t1"

  16  1   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"

  16  0   "SELECT DISTINCT t1.rowid FROM t1, t2"
  17  0   { /* Technically, it would be possible to detect that DISTINCT
            ** is a no-op in cases like the following. But SQLite does not
            ** do so. */
            SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid }

................................................................................
  21  0   "SELECT DISTINCT c2 FROM t3"

  22  0   "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
  23  1   "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"

  24  0   "SELECT DISTINCT rowid/2 FROM t1"
  25  1   "SELECT DISTINCT rowid/2, rowid FROM t1"
  26  1   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"

} {
  if {$noop} {
    do_distinct_noop_test 1.$tn $sql
  } else {
    do_distinct_not_noop_test 1.$tn $sql
  }
}







|

>
>
>
>



>
|
|
>






>
|







|
>
|
>
>
|
>
|
>
|







 







|
>







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
...
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
do_execsql_test 1.0 {
  CREATE TABLE t1(a, b, c, d);
  CREATE UNIQUE INDEX i1 ON t1(b, c);
  CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase);

  CREATE TABLE t2(x INTEGER PRIMARY KEY, y);

  CREATE TABLE t3(c1 PRIMARY KEY NOT NULL, c2 NOT NULL);
  CREATE INDEX i3 ON t3(c2);

  CREATE TABLE t4(a, b NOT NULL, c NOT NULL, d NOT NULL);
  CREATE UNIQUE INDEX t4i1 ON t4(b, c);
  CREATE UNIQUE INDEX t4i2 ON t4(d COLLATE nocase);
}
foreach {tn noop sql} {

  1.1 0   "SELECT DISTINCT b, c FROM t1"
  1.2 1   "SELECT DISTINCT b, c FROM t4"
  2.1 0   "SELECT DISTINCT c FROM t1 WHERE b = ?"
  2.2 1   "SELECT DISTINCT c FROM t4 WHERE b = ?"
  3   1   "SELECT DISTINCT rowid FROM t1"
  4   1   "SELECT DISTINCT rowid, a FROM t1"
  5   1   "SELECT DISTINCT x FROM t2"
  6   1   "SELECT DISTINCT * FROM t2"
  7   1   "SELECT DISTINCT * FROM (SELECT * FROM t2)"

  8.1 0   "SELECT DISTINCT * FROM t1"
  8.2 1   "SELECT DISTINCT * FROM t4"

  8   0   "SELECT DISTINCT a, b FROM t1"

  9   0   "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)"
  10  0   "SELECT DISTINCT c FROM t1"
  11  0   "SELECT DISTINCT b FROM t1"

  12.1 0   "SELECT DISTINCT a, d FROM t1"
  12.2 0   "SELECT DISTINCT a, d FROM t4"
  13.1 0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"
  13.2 0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t4"
  14.1 0   "SELECT DISTINCT a, d COLLATE nocase FROM t1"
  14.2 1   "SELECT DISTINCT a, d COLLATE nocase FROM t4"

  15   0   "SELECT DISTINCT a, d COLLATE binary FROM t1"
  16.1 0   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"
  16.2 1   "SELECT DISTINCT a, b, c COLLATE binary FROM t4"

  16  0   "SELECT DISTINCT t1.rowid FROM t1, t2"
  17  0   { /* Technically, it would be possible to detect that DISTINCT
            ** is a no-op in cases like the following. But SQLite does not
            ** do so. */
            SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid }

................................................................................
  21  0   "SELECT DISTINCT c2 FROM t3"

  22  0   "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
  23  1   "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"

  24  0   "SELECT DISTINCT rowid/2 FROM t1"
  25  1   "SELECT DISTINCT rowid/2, rowid FROM t1"
  26.1  0   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"
  26.2  1   "SELECT DISTINCT rowid/2, b FROM t4 WHERE c = ?"
} {
  if {$noop} {
    do_distinct_noop_test 1.$tn $sql
  } else {
    do_distinct_not_noop_test 1.$tn $sql
  }
}

Added test/tkt-385a5b56b9.test.













































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# 2012 April 02
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# The tests in this file were used while developing the SQLite 4 code. 
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix tkt-385a5b56b9

do_execsql_test 1.0 { 
  CREATE TABLE t1(x, y);
  INSERT INTO t1 VALUES(1, NULL);
  INSERT INTO t1 VALUES(2, NULL);
  INSERT INTO t1 VALUES(1, NULL);
}

do_execsql_test 1.1 { SELECT DISTINCT x, y FROM t1 } {1 {} 2 {}}
do_execsql_test 1.2 { CREATE UNIQUE INDEX i1 ON t1(x, y) }
do_execsql_test 1.3 { SELECT DISTINCT x, y FROM t1 } {1 {} 2 {}}


#-------------------------------------------------------------------------

do_execsql_test 2.0 {
  CREATE TABLE t2(x, y NOT NULL);
  CREATE UNIQUE INDEX t2x ON t2(x);
  CREATE UNIQUE INDEX t2y ON t2(y);
}

do_eqp_test 2.1 { SELECT DISTINCT x FROM t2 } {
  0 0 0 {SCAN TABLE t2 USING COVERING INDEX t2x (~1000000 rows)}
}

do_eqp_test 2.2 { SELECT DISTINCT y FROM t2 } {
  0 0 0 {SCAN TABLE t2 (~1000000 rows)}
}

do_eqp_test 2.3 { SELECT DISTINCT x, y FROM t2 WHERE y=10 } {
  0 0 0 {SEARCH TABLE t2 USING INDEX t2y (y=?) (~1 rows)}
}

do_eqp_test 2.4 { SELECT DISTINCT x, y FROM t2 WHERE x=10 } {
  0 0 0 {SEARCH TABLE t2 USING INDEX t2x (x=?) (~1 rows)}
}

finish_test