/ Check-in [30b81507]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance improvements for LIKE. It is still too slow though. (CVS 1535)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 30b81507fc404355751705c6f9856c178249eff1
User & Date: danielk1977 2004-06-06 12:41:50
Context
2004-06-07
01:52
Progress towards getting locking to work on windows. (CVS 1536) check-in: 4f7c0961 user: drh tags: trunk
2004-06-06
12:41
Performance improvements for LIKE. It is still too slow though. (CVS 1535) check-in: 30b81507 user: danielk1977 tags: trunk
09:44
Enhance user function API to support association of meta-data with constant arguments and the specification of text encoding preference. The LIKE operator takes advantage of both. (CVS 1534) check-in: 92337d8f user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/expr.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
....
1748
1749
1750
1751
1752
1753
1754

1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

char const *sqlite3AffinityString(char affinity){
  switch( affinity ){
    case SQLITE_AFF_INTEGER: return "i";
................................................................................
  }

  if( createFlag && matchqual<4 && 
      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
    pBest->nArg = nArg;
    pBest->pNext = pFirst;
    pBest->zName = (char*)&pBest[1];

    memcpy(pBest->zName, zName, nName);
    pBest->zName[nName] = 0;
    sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
  }

  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
    return pBest;
  }
  return 0;
}








|







 







>











8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
....
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.136 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

char const *sqlite3AffinityString(char affinity){
  switch( affinity ){
    case SQLITE_AFF_INTEGER: return "i";
................................................................................
  }

  if( createFlag && matchqual<4 && 
      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
    pBest->nArg = nArg;
    pBest->pNext = pFirst;
    pBest->zName = (char*)&pBest[1];
    pBest->iPrefEnc = eTextRep;
    memcpy(pBest->zName, zName, nName);
    pBest->zName[nName] = 0;
    sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
  }

  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
    return pBest;
  }
  return 0;
}

Changes to src/func.c.

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
414
415
416
417
418
419
420

421



422
423
424
425
426
427
428
...
457
458
459
460
461
462
463
464
465
466
467
468
469
470


471
472
473
474
475
476
477
478
479
480
481

482
483
484
485
486
487
488
...
489
490
491
492
493
494
495


496
497
498
499
500



501






502
503
504
505
506
507
508
509

510
511
512


513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
...
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $
*/
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include "sqliteInt.h"
#include "vdbeInt.h"
................................................................................
    }else{              /* A regular character */
      aState[n].val = c;

      assert( pc_state<=n );
      if( pc_state<0 ){
        aState[n].failstate = -1;
      }else if( pc_state==n ){

        aState[n].failstate = pc_state;



      }else{
        int k = pLike->aState[n-1].failstate;
        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
          k = aState[k].failstate;
        }
        if( k!=pc_state && aState[k+1].val==c ){
          assert( k==pc_state );
................................................................................
** not NULL, then this function uses UTF-16. Otherwise UTF-8.
*/
static void likeFunc(
  sqlite3_context *context, 
  int argc, 
  sqlite3_value **argv
){
  int s;
  int c;
  int nc;
  u8 enc;
  int offset = 0;
  const unsigned char *zString;
  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 



  /* If either argument is NULL, the result is NULL */
  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
      sqlite3_value_type(argv[0])==SQLITE_NULL ){
    return;
  }

  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
  if( sqlite3_user_data(context) ){
    enc = TEXT_Utf16;
    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);

  }else{
    enc = TEXT_Utf8;
    zString = sqlite3_value_text(argv[1]);
  }

  /* If the LIKE pattern has not been compiled, compile it now. */
  if( !pLike ){
................................................................................
    pLike = compileLike(argv[0], enc);
    if( !pLike ){
      sqlite3_result_error(context, "out of memory", -1);
      return;
    }
    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
  }



  s = 0;
  nc = 1;
  do {
    int val = pLike->aState[s].val;



    if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1);







#if defined(TRACE_LIKE) && !defined(NDEBUG)
    printf("State=%d:(%d, %d) Input=%d\n", 
        s, pLike->aState[s].val, 
        pLike->aState[s].failstate, c);
#endif

    if( val==-1 || val==c ){

      s++;
      nc = 1;
    }else{


      if( pLike->aState[s].failstate==s ){
        nc = 1;
      }else{
        nc = 0;
        s = pLike->aState[s].failstate;
      }
    }
  }while( c && s>=0 );

  if( s==pLike->nState ){
    sqlite3_result_int(context, 1);
  }else{
    sqlite3_result_int(context, 0);
  }
}

/*
................................................................................

  for(i=0; i<sizeof(aFuncs)/sizeof(aFuncs[0]); i++){
    void *pArg = 0;
    switch( aFuncs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg, 0, 0,
        pArg, aFuncs[i].xFunc, 0, 0);
  }
  for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
    void *pArg = 0;
    switch( aAggs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aAggs[i].zName, aAggs[i].nArg, 0, 0, pArg,
        0, aAggs[i].xStep, aAggs[i].xFinalize);
  }
  sqlite3RegisterDateTimeFunctions(db);
}







|







 







>
|
>
>
>







 







<
|
<




>
>











>







 







>
>

<
<

|
>
>
>
|
>
>
>
>
>
>



|
<


<
>
|
<

>
>
|
|
<
<
<


|

|







 







|
|












12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
...
461
462
463
464
465
466
467

468

469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
...
494
495
496
497
498
499
500
501
502
503


504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519

520
521

522
523

524
525
526
527
528



529
530
531
532
533
534
535
536
537
538
539
540
...
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.64 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include "sqliteInt.h"
#include "vdbeInt.h"
................................................................................
    }else{              /* A regular character */
      aState[n].val = c;

      assert( pc_state<=n );
      if( pc_state<0 ){
        aState[n].failstate = -1;
      }else if( pc_state==n ){
        if( c ){
          aState[n].failstate = pc_state;
        }else{
          aState[n].failstate = -2;
        }
      }else{
        int k = pLike->aState[n-1].failstate;
        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
          k = aState[k].failstate;
        }
        if( k!=pc_state && aState[k+1].val==c ){
          assert( k==pc_state );
................................................................................
** not NULL, then this function uses UTF-16. Otherwise UTF-8.
*/
static void likeFunc(
  sqlite3_context *context, 
  int argc, 
  sqlite3_value **argv
){

  register int c;

  u8 enc;
  int offset = 0;
  const unsigned char *zString;
  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 
  struct LikeState *aState;
  register struct LikeState *pState;

  /* If either argument is NULL, the result is NULL */
  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
      sqlite3_value_type(argv[0])==SQLITE_NULL ){
    return;
  }

  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
  if( sqlite3_user_data(context) ){
    enc = TEXT_Utf16;
    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
    assert(0);
  }else{
    enc = TEXT_Utf8;
    zString = sqlite3_value_text(argv[1]);
  }

  /* If the LIKE pattern has not been compiled, compile it now. */
  if( !pLike ){
................................................................................
    pLike = compileLike(argv[0], enc);
    if( !pLike ){
      sqlite3_result_error(context, "out of memory", -1);
      return;
    }
    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
  }
  aState = pLike->aState;
  pState = aState;



  do {
    if( enc==TEXT_Utf8 ){
      c = zString[offset++];
      if( c&0x80 ){
        offset--;
        c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
      }
    }else{
      c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
    }

skip_read:

#if defined(TRACE_LIKE) && !defined(NDEBUG)
    printf("State=%d:(%d, %d) Input=%d\n", 
        (aState - pState), pState->val, pState->failstate, c);

#endif


    if( pState->val==-1 || pState->val==c ){
      pState++;

    }else{
      struct LikeState *pFailState = &aState[pState->failstate];
      if( pState!=pFailState ){
        pState = pFailState;
        if( c && pState>=aState ) goto skip_read;



      }
    }
  }while( c && pState>=aState );

  if( (pState-aState)==pLike->nState || (pState-aState)<-1 ){
    sqlite3_result_int(context, 1);
  }else{
    sqlite3_result_int(context, 0);
  }
}

/*
................................................................................

  for(i=0; i<sizeof(aFuncs)/sizeof(aFuncs[0]); i++){
    void *pArg = 0;
    switch( aFuncs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg,
        aFuncs[i].eTextRep, 0, pArg, aFuncs[i].xFunc, 0, 0);
  }
  for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
    void *pArg = 0;
    switch( aAggs[i].argType ){
      case 1: pArg = db; break;
      case 2: pArg = (void *)(-1); break;
    }
    sqlite3_create_function(db, aAggs[i].zName, aAggs[i].nArg, 0, 0, pArg,
        0, aAggs[i].xStep, aAggs[i].xFinalize);
  }
  sqlite3RegisterDateTimeFunctions(db);
}

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
113
114
115
116
117
118
119




























120
121
122
123
124
125
126
...
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
  int ret = 0;

  switch( *pEnc ){
    case TEXT_Utf8: {




























      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;
        int trailing_bytes;
      };
      static const struct Utf8TblRow utf8tbl[] = {
................................................................................
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }
      
      break;
    }

    case TEXT_Utf16le:
    case TEXT_Utf16be: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
...
177
178
179
180
181
182
183

184
185
186
187
188
189
190
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.18 2004/06/06 12:41:50 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
** LOWERCASE(x) for details.
*/
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
  int ret = 0;

  switch( *pEnc ){
    case TEXT_Utf8: {

#if 0
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,
      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
    255,
  };
  ret = initVal[(unsigned char)zStr[(*pOffset)++]];
  while( (0xc0&zStr[*pOffset])==0x80 ){
    ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
  }
#endif

      struct Utf8TblRow {
        u8 b1_mask;
        u8 b1_masked_val;
        u8 b1_value_mask;
        int trailing_bytes;
      };
      static const struct Utf8TblRow utf8tbl[] = {
................................................................................
      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
        u8 b = zStr[(*pOffset)++];
        if( (b&0xC0)!=0x80 ){
          return (int)0xFFFD;
        }
        ret = (ret<<6) + (u32)(b&0x3F);
      }

      break;
    }

    case TEXT_Utf16le:
    case TEXT_Utf16be: {
      u32 code_point;   /* the first code-point in the character */
      u32 code_point2;  /* the second code-point in the character, if any */

Changes to src/util.c.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

/*
** If malloc() ever fails, this global variable gets set to 1.
................................................................................

#if 1  /* We are now always UTF-8 */
/*
** Convert the UTF-8 character to which z points into a 31-bit
** UCS character.  This only works right if z points to a well-formed
** UTF-8 string.
*/
static int sqlite3ReadUtf8(const unsigned char *z){
  int c;
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,







|







 







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
**
*************************************************************************
** Utility functions used throughout sqlite.
**
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
** $Id: util.c,v 1.98 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
#include <ctype.h>

/*
** If malloc() ever fails, this global variable gets set to 1.
................................................................................

#if 1  /* We are now always UTF-8 */
/*
** Convert the UTF-8 character to which z points into a 31-bit
** UCS character.  This only works right if z points to a well-formed
** UTF-8 string.
*/
int sqlite3ReadUtf8(const unsigned char *z){
  int c;
  static const int initVal[] = {
      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,