SQLite

Check-in [30b81507fc]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance improvements for LIKE. It is still too slow though. (CVS 1535)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 30b81507fc404355751705c6f9856c178249eff1
User & Date: danielk1977 2004-06-06 12:41:50.000
Context
2004-06-07
01:52
Progress towards getting locking to work on windows. (CVS 1536) (check-in: 4f7c0961ad user: drh tags: trunk)
2004-06-06
12:41
Performance improvements for LIKE. It is still too slow though. (CVS 1535) (check-in: 30b81507fc user: danielk1977 tags: trunk)
09:44
Enhance user function API to support association of meta-data with constant arguments and the specification of text encoding preference. The LIKE operator takes advantage of both. (CVS 1534) (check-in: 92337d8f79 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/expr.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

char const *sqlite3AffinityString(char affinity){
  switch( affinity ){
    case SQLITE_AFF_INTEGER: return "i";







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.136 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

char const *sqlite3AffinityString(char affinity){
  switch( affinity ){
    case SQLITE_AFF_INTEGER: return "i";
1748
1749
1750
1751
1752
1753
1754

1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
  }

  if( createFlag && matchqual<4 && 
      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
    pBest->nArg = nArg;
    pBest->pNext = pFirst;
    pBest->zName = (char*)&pBest[1];

    memcpy(pBest->zName, zName, nName);
    pBest->zName[nName] = 0;
    sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
  }

  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
    return pBest;
  }
  return 0;
}








>











1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
  }

  if( createFlag && matchqual<4 && 
      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
    pBest->nArg = nArg;
    pBest->pNext = pFirst;
    pBest->zName = (char*)&pBest[1];
    pBest->iPrefEnc = eTextRep;
    memcpy(pBest->zName, zName, nName);
    pBest->zName[nName] = 0;
    sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
  }

  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
    return pBest;
  }
  return 0;
}

Changes to src/func.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $
*/
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include "sqliteInt.h"
#include "vdbeInt.h"







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.64 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include "sqliteInt.h"
#include "vdbeInt.h"
414
415
416
417
418
419
420

421



422
423
424
425
426
427
428
    }else{              /* A regular character */
      aState[n].val = c;

      assert( pc_state<=n );
      if( pc_state<0 ){
        aState[n].failstate = -1;
      }else if( pc_state==n ){

        aState[n].failstate = pc_state;



      }else{
        int k = pLike->aState[n-1].failstate;
        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
          k = aState[k].failstate;
        }
        if( k!=pc_state && aState[k+1].val==c ){
          assert( k==pc_state );







>
|
>
>
>







414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
    }else{              /* A regular character */
      aState[n].val = c;

      assert( pc_state<=n );
      if( pc_state<0 ){
        aState[n].failstate = -1;
      }else if( pc_state==n ){
        if( c ){
          aState[n].failstate = pc_state;
        }else{
          aState[n].failstate = -2;
        }
      }else{
        int k = pLike->aState[n-1].failstate;
        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
          k = aState[k].failstate;
        }
        if( k!=pc_state && aState[k+1].val==c ){
          assert( k==pc_state );
457
458
459
460
461
462
463
464
465
466
467
468
469
470


471
472
473
474
475
476
477
478
479
480
481

482
483
484
485
486
487
488
489
490
491
492
493
494
495


496
497
498
499

500


501
502






503
504
505
506
507
508
509
510
511
512
513

514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
** not NULL, then this function uses UTF-16. Otherwise UTF-8.
*/
static void likeFunc(
  sqlite3_context *context, 
  int argc, 
  sqlite3_value **argv
){
  int s;
  int c;
  int nc;
  u8 enc;
  int offset = 0;
  const unsigned char *zString;
  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 



  /* If either argument is NULL, the result is NULL */
  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
      sqlite3_value_type(argv[0])==SQLITE_NULL ){
    return;
  }

  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
  if( sqlite3_user_data(context) ){
    enc = TEXT_Utf16;
    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);

  }else{
    enc = TEXT_Utf8;
    zString = sqlite3_value_text(argv[1]);
  }

  /* If the LIKE pattern has not been compiled, compile it now. */
  if( !pLike ){
    pLike = compileLike(argv[0], enc);
    if( !pLike ){
      sqlite3_result_error(context, "out of memory", -1);
      return;
    }
    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
  }



  s = 0;
  nc = 1;
  do {

    int val = pLike->aState[s].val;


    if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1);







#if defined(TRACE_LIKE) && !defined(NDEBUG)
    printf("State=%d:(%d, %d) Input=%d\n", 
        s, pLike->aState[s].val, 
        pLike->aState[s].failstate, c);
#endif

    if( val==-1 || val==c ){
      s++;
      nc = 1;
    }else{
      if( pLike->aState[s].failstate==s ){

        nc = 1;
      }else{
        nc = 0;
        s = pLike->aState[s].failstate;
      }
    }
  }while( c && s>=0 );

  if( s==pLike->nState ){
    sqlite3_result_int(context, 1);
  }else{
    sqlite3_result_int(context, 0);
  }
}

/*







<
|
<




>
>











>














>
>

<
<

>
|
>
>
|
|
>
>
>
>
>
>


<
|


|
|
<

|
>
|
<
<
|


|

|







461
462
463
464
465
466
467

468

469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503


504
505
506
507
508
509
510
511
512
513
514
515
516
517
518

519
520
521
522
523

524
525
526
527


528
529
530
531
532
533
534
535
536
537
538
539
540
** not NULL, then this function uses UTF-16. Otherwise UTF-8.
*/
static void likeFunc(
  sqlite3_context *context, 
  int argc, 
  sqlite3_value **argv
){

  register int c;

  u8 enc;
  int offset = 0;
  const unsigned char *zString;
  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 
  struct LikeState *aState;
  register struct LikeState *pState;

  /* If either argument is NULL, the result is NULL */
  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
      sqlite3_value_type(argv[0])==SQLITE_NULL ){
    return;
  }

  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
  if( sqlite3_user_data(context) ){
    enc = TEXT_Utf16;
    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
    assert(0);
  }else{
    enc = TEXT_Utf8;
    zString = sqlite3_value_text(argv[1]);
  }

  /* If the LIKE pattern has not been compiled, compile it now. */
  if( !pLike ){
    pLike = compileLike(argv[0], enc);
    if( !pLike ){
      sqlite3_result_error(context, "out of memory", -1);
      return;
    }
    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
  }
  aState = pLike->aState;
  pState = aState;



  do {
    if( enc==TEXT_Utf8 ){
      c = zString[offset++];
      if( c&0x80 ){
        offset--;
        c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
      }
    }else{
      c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
    }

skip_read:

#if defined(TRACE_LIKE) && !defined(NDEBUG)
    printf("State=%d:(%d, %d) Input=%d\n", 

        (aState - pState), pState->val, pState->failstate, c);
#endif

    if( pState->val==-1 || pState->val==c ){
      pState++;

    }else{
      struct LikeState *pFailState = &aState[pState->failstate];
      if( pState!=pFailState ){
        pState = pFailState;


        if( c && pState>=aState ) goto skip_read;
      }
    }
  }while( c && pState>=aState );

  if( (pState-aState)==pLike->nState || (pState-aState)<-1 ){
    sqlite3_result_int(context, 1);
  }else{
    sqlite3_result_int(context, 0);
  }
}

/*
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932

  for(i=0; i<sizeof(aFuncs)/sizeof(aFuncs[0]); i++){
    void *pArg = 0;
    switch( aFuncs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg, 0, 0,
        pArg, aFuncs[i].xFunc, 0, 0);
  }
  for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
    void *pArg = 0;
    switch( aAggs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aAggs[i].zName, aAggs[i].nArg, 0, 0, pArg,
        0, aAggs[i].xStep, aAggs[i].xFinalize);
  }
  sqlite3RegisterDateTimeFunctions(db);
}







|
|












923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943

  for(i=0; i<sizeof(aFuncs)/sizeof(aFuncs[0]); i++){
    void *pArg = 0;
    switch( aFuncs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg,
        aFuncs[i].eTextRep, 0, pArg, aFuncs[i].xFunc, 0, 0);
  }
  for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
    void *pArg = 0;
    switch( aAggs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aAggs[i].zName, aAggs[i].nArg, 0, 0, pArg,
        0, aAggs[i].xStep, aAggs[i].xFinalize);
  }
  sqlite3RegisterDateTimeFunctions(db);
}
Changes to src/utf.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.18 2004/06/06 12:41:50 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
113
114
115
116
117
118
119




























120
121
122
123
124
125
126
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
  int ret = 0;

  switch( *pEnc ){
    case TEXT_Utf8: {




























      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;
        int trailing_bytes;
      };
      static const struct Utf8TblRow utf8tbl[] = {







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
  int ret = 0;

  switch( *pEnc ){
    case TEXT_Utf8: {

#if 0
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,
      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
    255,
  };
  ret = initVal[(unsigned char)zStr[(*pOffset)++]];
  while( (0xc0&zStr[*pOffset])==0x80 ){
    ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
  }
#endif

      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;
        int trailing_bytes;
      };
      static const struct Utf8TblRow utf8tbl[] = {
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }
      
      break;
    }

    case TEXT_Utf16le:
    case TEXT_Utf16be: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */







<







177
178
179
180
181
182
183

184
185
186
187
188
189
190
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }

      break;
    }

    case TEXT_Utf16le:
    case TEXT_Utf16be: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */
Changes to src/util.c.
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

/*
** If malloc() ever fails, this global variable gets set to 1.







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.98 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

/*
** If malloc() ever fails, this global variable gets set to 1.
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927

#if 1  /* We are now always UTF-8 */
/*
** Convert the UTF-8 character to which z points into a 31-bit
** UCS character.  This only works right if z points to a well-formed
** UTF-8 string.
*/
static int sqlite3ReadUtf8(const unsigned char *z){
  int c;
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,







|







913
914
915
916
917
918
919
920
921
922
923
924
925
926
927

#if 1  /* We are now always UTF-8 */
/*
** Convert the UTF-8 character to which z points into a 31-bit
** UCS character.  This only works right if z points to a well-formed
** UTF-8 string.
*/
int sqlite3ReadUtf8(const unsigned char *z){
  int c;
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,