/ Check-in [9f932655]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the LIKE/GLOB query optimization so that it works as long as there is an index with the appropriate collating sequence and even if the default collating sequence of the column is different. Ticket [4711020446da7d93d99].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:9f932655f9eb9fdab16d7deed98b7cad414e0ca6
User & Date: drh 2010-07-22 17:49:53
Context
2010-07-22
17:55
Make the sqlite3_db_status() routine threadsafe. check-in: 241f7bd1 user: dan tags: trunk
17:49
Enhance the LIKE/GLOB query optimization so that it works as long as there is an index with the appropriate collating sequence and even if the default collating sequence of the column is different. Ticket [4711020446da7d93d99]. check-in: 9f932655 user: drh tags: trunk
15:44
Add test/threadtest3.c, containing multi-thread tests implemented in C. check-in: aad88cf5 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/expr.c.

50
51
52
53
54
55
56












57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
    int j = pExpr->iColumn;
    if( j<0 ) return SQLITE_AFF_INTEGER;
    assert( pExpr->pTab && j<pExpr->pTab->nCol );
    return pExpr->pTab->aCol[j].affinity;
  }
  return pExpr->affinity;
}













/*
** Set the collating sequence for expression pExpr to be the collating
** sequence named by pToken.   Return a pointer to the revised expression.
** The collating sequence is marked as "explicit" using the EP_ExpCollate
** flag.  An explicit collating sequence will override implicit
** collating sequences.
*/
Expr *sqlite3ExprSetColl(Parse *pParse, Expr *pExpr, Token *pCollName){
  char *zColl = 0;            /* Dequoted name of collation sequence */
  CollSeq *pColl;
  sqlite3 *db = pParse->db;
  zColl = sqlite3NameFromToken(db, pCollName);
  if( pExpr && zColl ){
    pColl = sqlite3LocateCollSeq(pParse, zColl);
    if( pColl ){
      pExpr->pColl = pColl;
      pExpr->flags |= EP_ExpCollate;
    }
  }

  sqlite3DbFree(db, zColl);
  return pExpr;
}

/*
** Return the default collation sequence for the expression pExpr. If
** there is no default collation type, return 0.







>
>
>
>
>
>
>
>
>
>
>
>








|




<
|
<
<
<
<
<
>







50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82





83
84
85
86
87
88
89
90
    int j = pExpr->iColumn;
    if( j<0 ) return SQLITE_AFF_INTEGER;
    assert( pExpr->pTab && j<pExpr->pTab->nCol );
    return pExpr->pTab->aCol[j].affinity;
  }
  return pExpr->affinity;
}

/*
** Set the explicit collating sequence for an expression to the
** collating sequence supplied in the second argument.
*/
Expr *sqlite3ExprSetColl(Expr *pExpr, CollSeq *pColl){
  if( pExpr && pColl ){
    pExpr->pColl = pColl;
    pExpr->flags |= EP_ExpCollate;
  }
  return pExpr;
}

/*
** Set the collating sequence for expression pExpr to be the collating
** sequence named by pToken.   Return a pointer to the revised expression.
** The collating sequence is marked as "explicit" using the EP_ExpCollate
** flag.  An explicit collating sequence will override implicit
** collating sequences.
*/
Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr *pExpr, Token *pCollName){
  char *zColl = 0;            /* Dequoted name of collation sequence */
  CollSeq *pColl;
  sqlite3 *db = pParse->db;
  zColl = sqlite3NameFromToken(db, pCollName);

  pColl = sqlite3LocateCollSeq(pParse, zColl);





  sqlite3ExprSetColl(pExpr, pColl);
  sqlite3DbFree(db, zColl);
  return pExpr;
}

/*
** Return the default collation sequence for the expression pExpr. If
** there is no default collation type, return 0.

Changes to src/parse.y.

776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
....
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
}
expr(A) ::= VARIABLE(X).     {
  spanExpr(&A, pParse, TK_VARIABLE, &X);
  sqlite3ExprAssignVarNumber(pParse, A.pExpr);
  spanSet(&A, &X, &X);
}
expr(A) ::= expr(E) COLLATE ids(C). {
  A.pExpr = sqlite3ExprSetColl(pParse, E.pExpr, &C);
  A.zStart = E.zStart;
  A.zEnd = &C.z[C.n];
}
%ifndef SQLITE_OMIT_CAST
expr(A) ::= CAST(X) LP expr(E) AS typetoken(T) RP(Y). {
  A.pExpr = sqlite3PExpr(pParse, TK_CAST, E.pExpr, 0, &T);
  spanSet(&A,&X,&Y);
................................................................................

idxlist_opt(A) ::= .                         {A = 0;}
idxlist_opt(A) ::= LP idxlist(X) RP.         {A = X;}
idxlist(A) ::= idxlist(X) COMMA nm(Y) collate(C) sortorder(Z).  {
  Expr *p = 0;
  if( C.n>0 ){
    p = sqlite3Expr(pParse->db, TK_COLUMN, 0);
    sqlite3ExprSetColl(pParse, p, &C);
  }
  A = sqlite3ExprListAppend(pParse,X, p);
  sqlite3ExprListSetName(pParse,A,&Y,1);
  sqlite3ExprListCheckLength(pParse, A, "index");
  if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z;
}
idxlist(A) ::= nm(Y) collate(C) sortorder(Z). {
  Expr *p = 0;
  if( C.n>0 ){
    p = sqlite3PExpr(pParse, TK_COLUMN, 0, 0, 0);
    sqlite3ExprSetColl(pParse, p, &C);
  }
  A = sqlite3ExprListAppend(pParse,0, p);
  sqlite3ExprListSetName(pParse, A, &Y, 1);
  sqlite3ExprListCheckLength(pParse, A, "index");
  if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z;
}








|







 







|










|







776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
....
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
}
expr(A) ::= VARIABLE(X).     {
  spanExpr(&A, pParse, TK_VARIABLE, &X);
  sqlite3ExprAssignVarNumber(pParse, A.pExpr);
  spanSet(&A, &X, &X);
}
expr(A) ::= expr(E) COLLATE ids(C). {
  A.pExpr = sqlite3ExprSetCollByToken(pParse, E.pExpr, &C);
  A.zStart = E.zStart;
  A.zEnd = &C.z[C.n];
}
%ifndef SQLITE_OMIT_CAST
expr(A) ::= CAST(X) LP expr(E) AS typetoken(T) RP(Y). {
  A.pExpr = sqlite3PExpr(pParse, TK_CAST, E.pExpr, 0, &T);
  spanSet(&A,&X,&Y);
................................................................................

idxlist_opt(A) ::= .                         {A = 0;}
idxlist_opt(A) ::= LP idxlist(X) RP.         {A = X;}
idxlist(A) ::= idxlist(X) COMMA nm(Y) collate(C) sortorder(Z).  {
  Expr *p = 0;
  if( C.n>0 ){
    p = sqlite3Expr(pParse->db, TK_COLUMN, 0);
    sqlite3ExprSetCollByToken(pParse, p, &C);
  }
  A = sqlite3ExprListAppend(pParse,X, p);
  sqlite3ExprListSetName(pParse,A,&Y,1);
  sqlite3ExprListCheckLength(pParse, A, "index");
  if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z;
}
idxlist(A) ::= nm(Y) collate(C) sortorder(Z). {
  Expr *p = 0;
  if( C.n>0 ){
    p = sqlite3PExpr(pParse, TK_COLUMN, 0, 0, 0);
    sqlite3ExprSetCollByToken(pParse, p, &C);
  }
  A = sqlite3ExprListAppend(pParse,0, p);
  sqlite3ExprListSetName(pParse, A, &Y, 1);
  sqlite3ExprListCheckLength(pParse, A, "index");
  if( A ) A->a[A->nExpr-1].sortOrder = (u8)Z;
}

Changes to src/sqliteInt.h.

2865
2866
2867
2868
2869
2870
2871
2872

2873
2874
2875
2876
2877
2878
2879
void *sqlite3HexToBlob(sqlite3*, const char *z, int n);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadSchema(Parse *pParse);
CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
Expr *sqlite3ExprSetColl(Parse *pParse, Expr *, Token *);

int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckObjectName(Parse *, const char *);
void sqlite3VdbeSetChanges(sqlite3 *, int);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, 







|
>







2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
void *sqlite3HexToBlob(sqlite3*, const char *z, int n);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
int sqlite3ReadSchema(Parse *pParse);
CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
Expr *sqlite3ExprSetColl(Expr*, CollSeq*);
Expr *sqlite3ExprSetCollByToken(Parse *pParse, Expr*, Token*);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckObjectName(Parse *, const char *);
void sqlite3VdbeSetChanges(sqlite3 *, int);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, 

Changes to src/where.c.

631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
...
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
....
1242
1243
1244
1245
1246
1247
1248

1249
1250
1251
1252
1253
1254
1255
....
1262
1263
1264
1265
1266
1267
1268

1269


1270
1271
1272
1273


1274
1275
1276
1277
1278
1279
1280
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[3];                /* Wildcard characters */
  CollSeq *pColl;            /* Collating sequence for LHS */
  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
  }
................................................................................
  pLeft = pList->a[1].pExpr;
  if( pLeft->op!=TK_COLUMN || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT ){
    /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must
    ** be the name of an indexed column with TEXT affinity. */
    return 0;
  }
  assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */
  pColl = sqlite3ExprCollSeq(pParse, pLeft);
  if( pColl==0 ) return 0;  /* Happens when LHS has an undefined collation */
  if( (pColl->type!=SQLITE_COLL_BINARY || *pnoCase) &&
      (pColl->type!=SQLITE_COLL_NOCASE || !*pnoCase) ){
    /* IMP: R-09003-32046 For the GLOB operator, the column must use the
    ** default BINARY collating sequence.
    ** IMP: R-41408-28306 For the LIKE operator, if case_sensitive_like mode
    ** is enabled then the column must use the default BINARY collating
    ** sequence, or if case_sensitive_like mode is disabled then the column
    ** must use the built-in NOCASE collating sequence.
    */
    return 0;
  }

  pRight = pList->a[0].pExpr;
  op = pRight->op;
  if( op==TK_REGISTER ){
    op = pRight->op2;
  }
  if( op==TK_VARIABLE ){
................................................................................
  ){
    Expr *pLeft;       /* LHS of LIKE/GLOB operator */
    Expr *pStr2;       /* Copy of pStr1 - RHS of LIKE/GLOB operator */
    Expr *pNewExpr1;
    Expr *pNewExpr2;
    int idxNew1;
    int idxNew2;


    pLeft = pExpr->x.pList->a[1].pExpr;
    pStr2 = sqlite3ExprDup(db, pStr1, 0);
    if( !db->mallocFailed ){
      u8 c, *pC;       /* Last character before the first wildcard */
      pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1];
      c = *pC;
................................................................................
        */
        if( c=='A'-1 ) isComplete = 0;

        c = sqlite3UpperToLower[c];
      }
      *pC = c + 1;
    }

    pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft,0),pStr1,0);


    idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC);
    testcase( idxNew1==0 );
    exprAnalyze(pSrc, pWC, idxNew1);
    pNewExpr2 = sqlite3PExpr(pParse, TK_LT, sqlite3ExprDup(db,pLeft,0),pStr2,0);


    idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC);
    testcase( idxNew2==0 );
    exprAnalyze(pSrc, pWC, idxNew2);
    pTerm = &pWC->a[idxTerm];
    if( isComplete ){
      pWC->a[idxNew1].iParent = idxTerm;
      pWC->a[idxNew2].iParent = idxTerm;







<







 







<
<
<
<
<
<
<
<
<
<
<
<
<







 







>







 







>
|
>
>



|
>
>







631
632
633
634
635
636
637

638
639
640
641
642
643
644
...
649
650
651
652
653
654
655













656
657
658
659
660
661
662
....
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
....
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
){
  const char *z = 0;         /* String on RHS of LIKE operator */
  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
  ExprList *pList;           /* List of operands to the LIKE operator */
  int c;                     /* One character in z[] */
  int cnt;                   /* Number of non-wildcard prefix characters */
  char wc[3];                /* Wildcard characters */

  sqlite3 *db = pParse->db;  /* Database connection */
  sqlite3_value *pVal = 0;
  int op;                    /* Opcode of pRight */

  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
    return 0;
  }
................................................................................
  pLeft = pList->a[1].pExpr;
  if( pLeft->op!=TK_COLUMN || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT ){
    /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must
    ** be the name of an indexed column with TEXT affinity. */
    return 0;
  }
  assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */














  pRight = pList->a[0].pExpr;
  op = pRight->op;
  if( op==TK_REGISTER ){
    op = pRight->op2;
  }
  if( op==TK_VARIABLE ){
................................................................................
  ){
    Expr *pLeft;       /* LHS of LIKE/GLOB operator */
    Expr *pStr2;       /* Copy of pStr1 - RHS of LIKE/GLOB operator */
    Expr *pNewExpr1;
    Expr *pNewExpr2;
    int idxNew1;
    int idxNew2;
    CollSeq *pColl;    /* Collating sequence to use */

    pLeft = pExpr->x.pList->a[1].pExpr;
    pStr2 = sqlite3ExprDup(db, pStr1, 0);
    if( !db->mallocFailed ){
      u8 c, *pC;       /* Last character before the first wildcard */
      pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1];
      c = *pC;
................................................................................
        */
        if( c=='A'-1 ) isComplete = 0;

        c = sqlite3UpperToLower[c];
      }
      *pC = c + 1;
    }
    pColl = sqlite3FindCollSeq(db, SQLITE_UTF8, noCase ? "NOCASE" : "BINARY",0);
    pNewExpr1 = sqlite3PExpr(pParse, TK_GE, 
                     sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl),
                     pStr1, 0);
    idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC);
    testcase( idxNew1==0 );
    exprAnalyze(pSrc, pWC, idxNew1);
    pNewExpr2 = sqlite3PExpr(pParse, TK_LT,
                     sqlite3ExprSetColl(sqlite3ExprDup(db,pLeft,0), pColl),
                     pStr2, 0);
    idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC);
    testcase( idxNew2==0 );
    exprAnalyze(pSrc, pWC, idxNew2);
    pTerm = &pWC->a[idxTerm];
    if( isComplete ){
      pWC->a[idxNew1].iParent = idxTerm;
      pWC->a[idxNew2].iParent = idxTerm;

Changes to test/like.test.

773
774
775
776
777
778
779
780


781























































































782
  }
} {12 123 scan 3 like 0}
do_test like-10.15 {
  count {
    SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY a;
  }
} {12 123 scan 5 like 6}



























































































finish_test








>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
  }
} {12 123 scan 3 like 0}
do_test like-10.15 {
  count {
    SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY a;
  }
} {12 123 scan 5 like 6}

# LIKE and GLOB where the default collating sequence is not appropriate
# but an index with the appropriate collating sequence exists.
#
do_test like-11.0 {
  execsql {
    CREATE TABLE t11(
      a INTEGER PRIMARY KEY,
      b TEXT COLLATE nocase,
      c TEXT COLLATE binary
    );
    INSERT INTO t11 VALUES(1, 'a','a');
    INSERT INTO t11 VALUES(2, 'ab','ab');
    INSERT INTO t11 VALUES(3, 'abc','abc');
    INSERT INTO t11 VALUES(4, 'abcd','abcd');
    INSERT INTO t11 VALUES(5, 'A','A');
    INSERT INTO t11 VALUES(6, 'AB','AB');
    INSERT INTO t11 VALUES(7, 'ABC','ABC');
    INSERT INTO t11 VALUES(8, 'ABCD','ABCD');
    INSERT INTO t11 VALUES(9, 'x','x');
    INSERT INTO t11 VALUES(10, 'yz','yz');
    INSERT INTO t11 VALUES(11, 'X','X');
    INSERT INTO t11 VALUES(12, 'YZ','YZ');
    SELECT count(*) FROM t11;
  }
} {12}
do_test like-11.1 {
  queryplan {
    PRAGMA case_sensitive_like=OFF;
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd ABC ABCD nosort t11 *}
do_test like-11.2 {
  queryplan {
    PRAGMA case_sensitive_like=ON;
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd nosort t11 *}
do_test like-11.3 {
  queryplan {
    PRAGMA case_sensitive_like=OFF;
    CREATE INDEX t11b ON t11(b);
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd ABC ABCD sort {} t11b}
do_test like-11.4 {
  queryplan {
    PRAGMA case_sensitive_like=ON;
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd nosort t11 *}
do_test like-11.5 {
  queryplan {
    PRAGMA case_sensitive_like=OFF;
    DROP INDEX t11b;
    CREATE INDEX t11bnc ON t11(b COLLATE nocase);
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd ABC ABCD sort {} t11bnc}
do_test like-11.6 {
  queryplan {
    CREATE INDEX t11bb ON t11(b COLLATE binary);
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd ABC ABCD sort {} t11bnc}
do_test like-11.7 {
  queryplan {
    PRAGMA case_sensitive_like=ON;
    SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a;
  }
} {abc abcd sort {} t11bb}
do_test like-11.8 {
  queryplan {
    PRAGMA case_sensitive_like=OFF;
    SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY a;
  }
} {abc abcd sort {} t11bb}
do_test like-11.9 {
  queryplan {
    CREATE INDEX t11cnc ON t11(c COLLATE nocase);
    CREATE INDEX t11cb ON t11(c COLLATE binary);
    SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY a;
  }
} {abc abcd ABC ABCD sort {} t11cnc}
do_test like-11.10 {
  queryplan {
    SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY a;
  }
} {abc abcd sort {} t11cb}


finish_test