SQLite

Check-in [47b73f6bfe]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge enhancements from trunk, especially the enhanced sqlite3_normalized_sql() interface.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | begin-concurrent
Files: files | file ages | folders
SHA3-256: 47b73f6bfee8c5e41c408fb70dff0e4596c0a3eb0aeba40ad232a6cb1fe75532
User & Date: drh 2018-12-05 13:44:19.230
Context
2018-12-06
00:05
Merge ALTER TABLE and sqlite3_normalized_sql() fixes from trunk. (check-in: acf10b3f8d user: drh tags: begin-concurrent)
2018-12-05
13:49
Merge enhancements from trunk, especially the sqlite3_normalized_sql() enhancement. (check-in: 342c9538d9 user: drh tags: begin-concurrent-pnu)
13:44
Merge enhancements from trunk, especially the enhanced sqlite3_normalized_sql() interface. (check-in: 47b73f6bfe user: drh tags: begin-concurrent)
13:39
Enhance the sqlite3_normalize_sql() interface so that it works even if the prepared statement was not initially compiled using SQLITE_PREPARE_NORMALIZED. Enhance the ".trace" command in the CLI so that it is able to access the full scope of functionality provided by sqlite3_trace_v2() and in particular so that it is able to show normalized SQL output using the newly enhanced sqlite3_normalize_sql() interface. (check-in: 7da617e97e user: drh tags: trunk)
2018-12-04
13:51
Fix a problem with SQLITE_ENABLE_EXPENSIVE_ASSERT builds on this branch. (check-in: ddb4a6fbf8 user: dan tags: begin-concurrent)
Changes
Unified Diff Ignore Whitespace Patch
Changes to Makefile.in.
1302
1303
1304
1305
1306
1307
1308



1309
1310
1311
1312
1313
1314
1315

showwal$(TEXE):	$(TOP)/tool/showwal.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/tool/showwal.c sqlite3.lo $(TLIBS)

showshm$(TEXE):	$(TOP)/tool/showshm.c
	$(LTLINK) -o $@ $(TOP)/tool/showshm.c




changeset$(TEXE):	$(TOP)/ext/session/changeset.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/ext/session/changeset.c sqlite3.lo $(TLIBS)

changesetfuzz$(TEXE):	$(TOP)/ext/session/changesetfuzz.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/ext/session/changesetfuzz.c sqlite3.lo $(TLIBS)

rollback-test$(TEXE):	$(TOP)/tool/rollback-test.c sqlite3.lo







>
>
>







1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318

showwal$(TEXE):	$(TOP)/tool/showwal.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/tool/showwal.c sqlite3.lo $(TLIBS)

showshm$(TEXE):	$(TOP)/tool/showshm.c
	$(LTLINK) -o $@ $(TOP)/tool/showshm.c

index_usage$(TEXE): $(TOP)/tool/index_usage.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/tool/index_usage.c sqlite3.lo $(TLIBS)

changeset$(TEXE):	$(TOP)/ext/session/changeset.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/ext/session/changeset.c sqlite3.lo $(TLIBS)

changesetfuzz$(TEXE):	$(TOP)/ext/session/changesetfuzz.c sqlite3.lo
	$(LTLINK) -o $@ $(TOP)/ext/session/changesetfuzz.c sqlite3.lo $(TLIBS)

rollback-test$(TEXE):	$(TOP)/tool/rollback-test.c sqlite3.lo
Changes to Makefile.msc.
2440
2441
2442
2443
2444
2445
2446




2447
2448
2449
2450
2451
2452
2453
showwal.exe:	$(TOP)\tool\showwal.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
		$(TOP)\tool\showwal.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS)

showshm.exe:	$(TOP)\tool\showshm.c
	$(LTLINK) $(NO_WARN)	$(TOP)\tool\showshm.c /link $(LDFLAGS) $(LTLINKOPTS)





changeset.exe:	$(TOP)\ext\session\changeset.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
		-DSQLITE_ENABLE_SESSION=1 -DSQLITE_ENABLE_PREUPDATE_HOOK=1 \
		$(TOP)\ext\session\changeset.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS)

changesetfuzz.exe:	$(TOP)\ext\session\changesetfuzz.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \







>
>
>
>







2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
showwal.exe:	$(TOP)\tool\showwal.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
		$(TOP)\tool\showwal.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS)

showshm.exe:	$(TOP)\tool\showshm.c
	$(LTLINK) $(NO_WARN)	$(TOP)\tool\showshm.c /link $(LDFLAGS) $(LTLINKOPTS)

index_usage.exe: $(TOP)\tool\index_usage.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
		$(TOP)\tool\index_usage.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS)

changeset.exe:	$(TOP)\ext\session\changeset.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
		-DSQLITE_ENABLE_SESSION=1 -DSQLITE_ENABLE_PREUPDATE_HOOK=1 \
		$(TOP)\ext\session\changeset.c $(SQLITE3C) /link $(LDFLAGS) $(LTLINKOPTS)

changesetfuzz.exe:	$(TOP)\ext\session\changesetfuzz.c $(SQLITE3C) $(SQLITE3H)
	$(LTLINK) $(NO_WARN) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
Changes to autoconf/Makefile.msc.
279
280
281
282
283
284
285

286
287
288
289
290
291
292
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_RTREE=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_GEOPOLY=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_JSON1=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_STMTVTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBPAGE_VTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBSTAT_VTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_INTROSPECTION_PRAGMAS=1

!ENDIF
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_COLUMN_METADATA=1
!ENDIF

# Should the session extension be enabled?  If so, add compilation options
# to enable it.
#







>







279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_RTREE=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_GEOPOLY=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_JSON1=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_STMTVTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBPAGE_VTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBSTAT_VTAB=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_INTROSPECTION_PRAGMAS=1
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DESERIALIZE=1
!ENDIF
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_COLUMN_METADATA=1
!ENDIF

# Should the session extension be enabled?  If so, add compilation options
# to enable it.
#
933
934
935
936
937
938
939

940
941
942
943
944
945
946
# Additional compiler options for the shell.  These are only effective
# when the shell is not being dynamically linked.
#
!IF $(DYNAMIC_SHELL)==0 && $(FOR_WIN10)==0
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_FTS4=1
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_EXPLAIN_COMMENTS=1
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_OFFSET_SQL_FUNC=1

!ENDIF


# This is the default Makefile target.  The objects listed here
# are what get build when you type just "make" with no arguments.
#
core:	dll shell







>







934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
# Additional compiler options for the shell.  These are only effective
# when the shell is not being dynamically linked.
#
!IF $(DYNAMIC_SHELL)==0 && $(FOR_WIN10)==0
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_FTS4=1
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_EXPLAIN_COMMENTS=1
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_OFFSET_SQL_FUNC=1
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_DESERIALIZE=1
!ENDIF


# This is the default Makefile target.  The objects listed here
# are what get build when you type just "make" with no arguments.
#
core:	dll shell
Changes to ext/fts3/fts3_unicode.c.
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#endif /* ifndef SQLITE_AMALGAMATION */

typedef struct unicode_tokenizer unicode_tokenizer;
typedef struct unicode_cursor unicode_cursor;

struct unicode_tokenizer {
  sqlite3_tokenizer base;
  int bRemoveDiacritic;
  int nException;
  int *aiException;
};

struct unicode_cursor {
  sqlite3_tokenizer_cursor base;
  const unsigned char *aInput;    /* Input text being tokenized */







|







78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#endif /* ifndef SQLITE_AMALGAMATION */

typedef struct unicode_tokenizer unicode_tokenizer;
typedef struct unicode_cursor unicode_cursor;

struct unicode_tokenizer {
  sqlite3_tokenizer base;
  int eRemoveDiacritic;
  int nException;
  int *aiException;
};

struct unicode_cursor {
  sqlite3_tokenizer_cursor base;
  const unsigned char *aInput;    /* Input text being tokenized */
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240



241
242
243
244
245
246
247
  unicode_tokenizer *pNew;        /* New tokenizer object */
  int i;
  int rc = SQLITE_OK;

  pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
  if( pNew==NULL ) return SQLITE_NOMEM;
  memset(pNew, 0, sizeof(unicode_tokenizer));
  pNew->bRemoveDiacritic = 1;

  for(i=0; rc==SQLITE_OK && i<nArg; i++){
    const char *z = azArg[i];
    int n = (int)strlen(z);

    if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
      pNew->bRemoveDiacritic = 1;
    }
    else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
      pNew->bRemoveDiacritic = 0;



    }
    else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
      rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
    }
    else if( n>=11 && memcmp("separators=", z, 11)==0 ){
      rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
    }







|






|


|
>
>
>







223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  unicode_tokenizer *pNew;        /* New tokenizer object */
  int i;
  int rc = SQLITE_OK;

  pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
  if( pNew==NULL ) return SQLITE_NOMEM;
  memset(pNew, 0, sizeof(unicode_tokenizer));
  pNew->eRemoveDiacritic = 1;

  for(i=0; rc==SQLITE_OK && i<nArg; i++){
    const char *z = azArg[i];
    int n = (int)strlen(z);

    if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
      pNew->eRemoveDiacritic = 1;
    }
    else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
      pNew->eRemoveDiacritic = 0;
    }
    else if( n==19 && memcmp("remove_diacritics=2", z, 19)==0 ){
      pNew->eRemoveDiacritic = 2;
    }
    else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
      rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
    }
    else if( n>=11 && memcmp("separators=", z, 11)==0 ){
      rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
    }
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
      zOut = &zNew[zOut - pCsr->zToken];
      pCsr->zToken = zNew;
      pCsr->nAlloc += 64;
    }

    /* Write the folded case of the last character read to the output */
    zEnd = z;
    iOut = sqlite3FtsUnicodeFold((int)iCode, p->bRemoveDiacritic);
    if( iOut ){
      WRITE_UTF8(zOut, iOut);
    }

    /* If the cursor is not at EOF, read the next character */
    if( z>=zTerm ) break;
    READ_UTF8(z, zTerm, iCode);







|







349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
      zOut = &zNew[zOut - pCsr->zToken];
      pCsr->zToken = zNew;
      pCsr->nAlloc += 64;
    }

    /* Write the folded case of the last character read to the output */
    zEnd = z;
    iOut = sqlite3FtsUnicodeFold((int)iCode, p->eRemoveDiacritic);
    if( iOut ){
      WRITE_UTF8(zOut, iOut);
    }

    /* If the cursor is not at EOF, read the next character */
    if( z>=zTerm ) break;
    READ_UTF8(z, zTerm, iCode);
Changes to ext/fts3/fts3_unicode2.c.
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

171
172

173
174
175
176

177
178
179
180
181
182
183
184
185
186
187















188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

204
205
206
207
208
209
210
211
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int remove_diacritic(int c){
  unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 

     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 

    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
    62924, 63050, 63082, 63274, 63390, 

  };
  char aChar[] = {
    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
    'e',  'i',  'o',  'u',  'y',  















  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );

  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/







|





|
|
|
>
|
|
>
|
|
|
|
>


<
<
<
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
















>
|







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181



182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int remove_diacritic(int c, int bComplex){
  unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
     3456,  3696,  3712,  3728,  3744,  3766,  3832,  3896, 
     3912,  3928,  3944,  3968,  4008,  4040,  4056,  4106, 
     4138,  4170,  4202,  4234,  4266,  4296,  4312,  4344, 
     4408,  4424,  4442,  4472,  4488,  4504,  6148,  6198, 
     6264,  6280,  6360,  6429,  6505,  6529, 61448, 61468, 
    61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, 
    61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, 
    61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, 
    62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, 
    62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, 
    62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, 
    63182, 63242, 63274, 63310, 63368, 63390, 
  };
  char aChar[] = {



    '\0',      'a'|0x00,  'c'|0x00,  'e'|0x00,  'i'|0x00,  'n'|0x00,  
    'o'|0x00,  'u'|0x00,  'y'|0x00,  'y'|0x00,  'a'|0x00,  'c'|0x00,  
    'd'|0x00,  'e'|0x00,  'e'|0x00,  'g'|0x00,  'h'|0x00,  'i'|0x00,  
    'j'|0x00,  'k'|0x00,  'l'|0x00,  'n'|0x00,  'o'|0x00,  'r'|0x00,  
    's'|0x00,  't'|0x00,  'u'|0x00,  'u'|0x00,  'w'|0x00,  'y'|0x00,  
    'z'|0x00,  'o'|0x00,  'u'|0x00,  'a'|0x00,  'i'|0x00,  'o'|0x00,  
    'u'|0x00,  'u'|0x80,  'a'|0x80,  'g'|0x00,  'k'|0x00,  'o'|0x00,  
    'o'|0x80,  'j'|0x00,  'g'|0x00,  'n'|0x00,  'a'|0x80,  'a'|0x00,  
    'e'|0x00,  'i'|0x00,  'o'|0x00,  'r'|0x00,  'u'|0x00,  's'|0x00,  
    't'|0x00,  'h'|0x00,  'a'|0x00,  'e'|0x00,  'o'|0x80,  'o'|0x00,  
    'o'|0x80,  'y'|0x00,  '\0',      '\0',      '\0',      '\0',      
    '\0',      '\0',      '\0',      '\0',      'a'|0x00,  'b'|0x00,  
    'c'|0x80,  'd'|0x00,  'd'|0x00,  'e'|0x80,  'e'|0x00,  'e'|0x80,  
    'f'|0x00,  'g'|0x00,  'h'|0x00,  'h'|0x00,  'i'|0x00,  'i'|0x80,  
    'k'|0x00,  'l'|0x00,  'l'|0x80,  'l'|0x00,  'm'|0x00,  'n'|0x00,  
    'o'|0x80,  'p'|0x00,  'r'|0x00,  'r'|0x80,  'r'|0x00,  's'|0x00,  
    's'|0x80,  't'|0x00,  'u'|0x00,  'u'|0x80,  'v'|0x00,  'w'|0x00,  
    'w'|0x00,  'x'|0x00,  'y'|0x00,  'z'|0x00,  'h'|0x00,  't'|0x00,  
    'w'|0x00,  'y'|0x00,  'a'|0x00,  'a'|0x80,  'a'|0x80,  'a'|0x80,  
    'e'|0x00,  'e'|0x80,  'e'|0x80,  'i'|0x00,  'o'|0x00,  'o'|0x80,  
    'o'|0x80,  'o'|0x80,  'u'|0x00,  'u'|0x80,  'u'|0x80,  'y'|0x00,  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );
  if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to







|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to
347
348
349
350
351
352
353

354

355
356
357
358
359
360
361
362
363
364
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }


    if( bRemoveDiacritic ) ret = remove_diacritic(ret);

  }
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */







>
|
>










363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }

    if( eRemoveDiacritic ){
      ret = remove_diacritic(ret, eRemoveDiacritic==2);
    }
  }
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
Changes to ext/fts3/unicode/mkunicode.tcl.
1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34

35
36
37
38

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

66



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105

source [file join [file dirname [info script]] parseunicode.tcl]

proc print_rd {map} {
  global tl_lookup_table
  set aChar [list]
  set lRange [list]

  set nRange 1
  set iFirst  [lindex $map 0 0]
  set cPrev   [lindex $map 0 1]


  foreach m [lrange $map 1 end] {
    foreach {i c} $m {}

    if {$cPrev == $c} {
      for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {
        if {[info exists tl_lookup_table($j)]==0} break
      }

      if {$j==$i} {
        set nNew [expr {(1 + $i - $iFirst)}]
        if {$nNew<=8} {
          set nRange $nNew
          continue
        }
      }
    }

    lappend lRange [list $iFirst $nRange]
    lappend aChar  $cPrev


    set iFirst $i
    set cPrev  $c

    set nRange 1
  }
  lappend lRange [list $iFirst $nRange]
  lappend aChar $cPrev


  puts "/*"
  puts "** If the argument is a codepoint corresponding to a lowercase letter"
  puts "** in the ASCII range with a diacritic added, return the codepoint"
  puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
  puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
  puts "** E\"). The resuls of passing a codepoint that corresponds to an"
  puts "** uppercase letter are undefined."
  puts "*/"
  puts "static int ${::remove_diacritic}(int c)\{"
  puts "  unsigned short aDia\[\] = \{"
  puts -nonewline "        0, "
  set i 1
  foreach r $lRange {
    foreach {iCode nRange} $r {}
    if {($i % 8)==0} {puts "" ; puts -nonewline "    " }
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"
  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0', "
  set i 1
  foreach c $aChar {

    set str "'$c',  "



    if {$c == ""} { set str "'\\0', " }

    if {($i % 12)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }
  puts ""
  puts "  \};"
  puts {
  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );

  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
  puts "\}"
}

proc print_isdiacritic {zFunc map} {

  set lCode [list]
  foreach m $map {
    foreach {code char} $m {}

    if {$code && $char == ""} { lappend lCode $code }
  }
  set lCode [lsort -integer $lCode]
  set iFirst [lindex $lCode 0]
  set iLast [lindex $lCode end]

  set i1 0











>


|

|















>



>




>









|














|

|
>
|
>
>
>
|

|




















>
|







|
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

source [file join [file dirname [info script]] parseunicode.tcl]

proc print_rd {map} {
  global tl_lookup_table
  set aChar [list]
  set lRange [list]

  set nRange 1
  set iFirst  [lindex $map 0 0]
  set cPrev   [lindex $map 0 1]
  set fPrev   [lindex $map 0 2]

  foreach m [lrange $map 1 end] {
    foreach {i c f} $m {}

    if {$cPrev == $c && $fPrev==$f} {
      for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {
        if {[info exists tl_lookup_table($j)]==0} break
      }

      if {$j==$i} {
        set nNew [expr {(1 + $i - $iFirst)}]
        if {$nNew<=8} {
          set nRange $nNew
          continue
        }
      }
    }

    lappend lRange [list $iFirst $nRange]
    lappend aChar  $cPrev
    lappend aFlag  $fPrev

    set iFirst $i
    set cPrev  $c
    set fPrev  $f
    set nRange 1
  }
  lappend lRange [list $iFirst $nRange]
  lappend aChar $cPrev
  lappend aFlag $fPrev

  puts "/*"
  puts "** If the argument is a codepoint corresponding to a lowercase letter"
  puts "** in the ASCII range with a diacritic added, return the codepoint"
  puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
  puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
  puts "** E\"). The resuls of passing a codepoint that corresponds to an"
  puts "** uppercase letter are undefined."
  puts "*/"
  puts "static int ${::remove_diacritic}(int c, int bComplex)\{"
  puts "  unsigned short aDia\[\] = \{"
  puts -nonewline "        0, "
  set i 1
  foreach r $lRange {
    foreach {iCode nRange} $r {}
    if {($i % 8)==0} {puts "" ; puts -nonewline "    " }
    incr i

    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
    puts -nonewline ", "
  }
  puts ""
  puts "  \};"
  puts "  char aChar\[\] = \{"
  puts -nonewline "    '\\0',      "
  set i 1
  foreach c $aChar f $aFlag {
    if { $f } {
      set str "'$c'|0x80,  "
    } else {
      set str "'$c'|0x00,  "
    }
    if {$c == ""} { set str "'\\0',      " }

    if {($i % 6)==0} {puts "" ; puts -nonewline "    " }
    incr i
    puts -nonewline "$str"
  }
  puts ""
  puts "  \};"
  puts {
  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );
  if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);}
  puts "\}"
}

proc print_isdiacritic {zFunc map} {

  set lCode [list]
  foreach m $map {
    foreach {code char flag} $m {}
    if {$flag} continue
    if {$code && $char == ""} { lappend lCode $code }
  }
  set lCode [lsort -integer $lCode]
  set iFirst [lindex $lCode 0]
  set iLast [lindex $lCode end]

  set i1 0
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
  puts "** is an upper case character that has a lower case equivalent,"
  puts "** return the codepoint corresponding to the lower case version."
  puts "** Otherwise, return a copy of the argument."
  puts "**"
  puts "** The results are undefined if the value passed to this function"
  puts "** is less than zero."
  puts "*/"
  puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"

  set liOff [tl_generate_ioff_table $lRecord]
  tl_print_table_header
  foreach entry $lRecord { 
    if {[tl_print_table_entry toggle $entry $liOff]} { 
      lappend lHigh $entry 
    } 







|







478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
  puts "** is an upper case character that has a lower case equivalent,"
  puts "** return the codepoint corresponding to the lower case version."
  puts "** Otherwise, return a copy of the argument."
  puts "**"
  puts "** The results are undefined if the value passed to this function"
  puts "** is less than zero."
  puts "*/"
  puts "int ${zFunc}\(int c, int eRemoveDiacritic)\{"

  set liOff [tl_generate_ioff_table $lRecord]
  tl_print_table_header
  foreach entry $lRecord { 
    if {[tl_print_table_entry toggle $entry $liOff]} { 
      lappend lHigh $entry 
    } 
512
513
514
515
516
517
518

519

520
521
522
523
524
525
526
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }


    if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);

  }
  }]

  foreach entry $lHigh {
    tl_print_if_entry $entry
  }








>
|
>







522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }

    if( eRemoveDiacritic ){
      ret = ${::remove_diacritic}(ret, eRemoveDiacritic==2);
    }
  }
  }]

  foreach entry $lHigh {
    tl_print_if_entry $entry
  }

601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
  set caseN [categories_switch C N {d l o}]
  set caseP [categories_switch C P {c d e f i o s}]
  set caseS [categories_switch C S {c k m o}]
  set caseZ [categories_switch C Z {l p s}]

  set nCat [expr [llength [array names C]] + 1]
  puts [code {
    int sqlite3Fts5UnicodeNCat(void) { 
      return $nCat;
    }

    int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
      aArray[0] = 1;
      switch( zCat[0] ){
        $caseC
        $caseL
        $caseM
        $caseN







<
<
<
<







613
614
615
616
617
618
619




620
621
622
623
624
625
626
  set caseN [categories_switch C N {d l o}]
  set caseP [categories_switch C P {c d e f i o s}]
  set caseS [categories_switch C S {c k m o}]
  set caseZ [categories_switch C Z {l p s}]

  set nCat [expr [llength [array names C]] + 1]
  puts [code {




    int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
      aArray[0] = 1;
      switch( zCat[0] ){
        $caseC
        $caseL
        $caseM
        $caseN
Changes to ext/fts3/unicode/parseunicode.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13
14












15
16
17
18
19
20
21

#--------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the 
# codepoint should simply be removed from the input. Examples:
#
#   { 224 a  }     (replace codepoint 224 to "a")
#   { 769 "" }     (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.












#
proc rd_load_unicodedata_text {zName} {
  global tl_lookup_table

  set fd [open $zName]
  set lField {
    code









|
|



>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

#--------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the 
# codepoint should simply be removed from the input. Examples:
#
#   { 224 a  0 }     (replace codepoint 224 to "a")
#   { 769 "" 0 }     (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
# The third value in the list is always either 0 or 1. 0 if the 
# UnicodeData.txt file maps the codepoint to a single ASCII character and
# a diacritic, or 1 if the mapping is indirect. For example, consider the 
# two entries:
#
# 1ECD;LATIN SMALL LETTER O WITH DOT BELOW;Ll;0;L;006F 0323;;;;N;;;1ECC;;1ECC
# 1ED9;LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW;Ll;0;L;1ECD 0302;;;;N;;;1ED8;;1ED8
#
# The first codepoint is a direct mapping (as 006F is ASCII and 0323 is a 
# diacritic). The second is an indirect mapping, as it maps to the
# first codepoint plus 0302 (a diacritic).
#
proc rd_load_unicodedata_text {zName} {
  global tl_lookup_table

  set fd [open $zName]
  set lField {
    code
49
50
51
52
53
54
55


56








57
58
59
60
61

62
63
64
65
66
67
68
69
70
71
72
73
74
      continue
    }

    set iCode  [expr "0x$code"]
    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]



    if {[info exists tl_lookup_table($iCode)]} continue









    if { ($iAscii >= 97 && $iAscii <= 122)
      || ($iAscii >= 65 && $iAscii <= 90)
    } {
      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

      set dia($iDia) 1
    }
  }

  foreach d [array names dia] {
    lappend lRet [list $d ""]
  }
  set lRet [lsort -integer -index 0 $lRet]

  close $fd
  set lRet
}








>
>

>
>
>
>
>
>
>
>




|
>





|







61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
      continue
    }

    set iCode  [expr "0x$code"]
    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]

    # Filter out upper-case characters, as they will be mapped to their
    # lower-case equivalents before this data is used.
    if {[info exists tl_lookup_table($iCode)]} continue

    # Check if this is an indirect mapping. If so, set bIndirect to true
    # and change $iAscii to the indirectly mappped ASCII character.
    set bIndirect 0
    if {[info exists dia($iDia)] && [info exists mapping($iAscii)]} {
      set iAscii $mapping($iAscii)
      set bIndirect 1
    }

    if { ($iAscii >= 97 && $iAscii <= 122)
      || ($iAscii >= 65 && $iAscii <= 90)
    } {
      lappend lRet [list $iCode [string tolower [format %c $iAscii]] $bIndirect]
      set mapping($iCode) $iAscii
      set dia($iDia) 1
    }
  }

  foreach d [array names dia] {
    lappend lRet [list $d "" 0]
  }
  set lRet [lsort -integer -index 0 $lRet]

  close $fd
  set lRet
}

Changes to ext/fts5/fts5_tokenize.c.
230
231
232
233
234
235
236
237
238
239
240
241
242





243
244
245
246
247
248
249
#endif /* ifndef SQLITE_AMALGAMATION */

typedef struct Unicode61Tokenizer Unicode61Tokenizer;
struct Unicode61Tokenizer {
  unsigned char aTokenChar[128];  /* ASCII range token characters */
  char *aFold;                    /* Buffer to fold text into */
  int nFold;                      /* Size of aFold[] in bytes */
  int bRemoveDiacritic;           /* True if remove_diacritics=1 is set */
  int nException;
  int *aiException;

  unsigned char aCategory[32];    /* True for token char categories */
};






static int fts5UnicodeAddExceptions(
  Unicode61Tokenizer *p,          /* Tokenizer object */
  const char *z,                  /* Characters to treat as exceptions */
  int bTokenChars                 /* 1 for 'tokenchars', 0 for 'separators' */
){
  int rc = SQLITE_OK;







|





>
>
>
>
>







230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#endif /* ifndef SQLITE_AMALGAMATION */

typedef struct Unicode61Tokenizer Unicode61Tokenizer;
struct Unicode61Tokenizer {
  unsigned char aTokenChar[128];  /* ASCII range token characters */
  char *aFold;                    /* Buffer to fold text into */
  int nFold;                      /* Size of aFold[] in bytes */
  int eRemoveDiacritic;           /* True if remove_diacritics=1 is set */
  int nException;
  int *aiException;

  unsigned char aCategory[32];    /* True for token char categories */
};

/* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */
#define FTS5_REMOVE_DIACRITICS_NONE    0
#define FTS5_REMOVE_DIACRITICS_SIMPLE  1
#define FTS5_REMOVE_DIACRITICS_COMPLEX 2

static int fts5UnicodeAddExceptions(
  Unicode61Tokenizer *p,          /* Tokenizer object */
  const char *z,                  /* Characters to treat as exceptions */
  int bTokenChars                 /* 1 for 'tokenchars', 0 for 'separators' */
){
  int rc = SQLITE_OK;
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386






387
388
389
390
391
392
393
394
395
  }else{
    p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
    if( p ){
      const char *zCat = "L* N* Co";
      int i;
      memset(p, 0, sizeof(Unicode61Tokenizer));

      p->bRemoveDiacritic = 1;
      p->nFold = 64;
      p->aFold = sqlite3_malloc(p->nFold * sizeof(char));
      if( p->aFold==0 ){
        rc = SQLITE_NOMEM;
      }

      /* Search for a "categories" argument */
      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        if( 0==sqlite3_stricmp(azArg[i], "categories") ){
          zCat = azArg[i+1];
        }
      }

      if( rc==SQLITE_OK ){
        rc = unicodeSetCategories(p, zCat);
      }

      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        const char *zArg = azArg[i+1];
        if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
          if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
            rc = SQLITE_ERROR;






          }
          p->bRemoveDiacritic = (zArg[0]=='1');
        }else
        if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 1);
        }else
        if( 0==sqlite3_stricmp(azArg[i], "separators") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 0);
        }else







|




















|

>
>
>
>
>
>

<







362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398

399
400
401
402
403
404
405
  }else{
    p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
    if( p ){
      const char *zCat = "L* N* Co";
      int i;
      memset(p, 0, sizeof(Unicode61Tokenizer));

      p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE;
      p->nFold = 64;
      p->aFold = sqlite3_malloc(p->nFold * sizeof(char));
      if( p->aFold==0 ){
        rc = SQLITE_NOMEM;
      }

      /* Search for a "categories" argument */
      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        if( 0==sqlite3_stricmp(azArg[i], "categories") ){
          zCat = azArg[i+1];
        }
      }

      if( rc==SQLITE_OK ){
        rc = unicodeSetCategories(p, zCat);
      }

      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        const char *zArg = azArg[i+1];
        if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
          if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
            rc = SQLITE_ERROR;
          }else{
            p->eRemoveDiacritic = (zArg[0] - '0');
            assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE
                 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE
                 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX
            );
          }

        }else
        if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 1);
        }else
        if( 0==sqlite3_stricmp(azArg[i], "separators") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 0);
        }else
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509

      if( *zCsr & 0x80 ){
        /* An non-ascii-range character. Fold it into the output buffer if
        ** it is a token character, or break out of the loop if it is not. */
        READ_UTF8(zCsr, zTerm, iCode);
        if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
 non_ascii_tokenchar:
          iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
          if( iCode ) WRITE_UTF8(zOut, iCode);
        }else{
          break;
        }
      }else if( a[*zCsr]==0 ){
        /* An ascii-range separator character. End of token. */
        break; 







|







505
506
507
508
509
510
511
512
513
514
515
516
517
518
519

      if( *zCsr & 0x80 ){
        /* An non-ascii-range character. Fold it into the output buffer if
        ** it is a token character, or break out of the loop if it is not. */
        READ_UTF8(zCsr, zTerm, iCode);
        if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
 non_ascii_tokenchar:
          iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic);
          if( iCode ) WRITE_UTF8(zOut, iCode);
        }else{
          break;
        }
      }else if( a[*zCsr]==0 ){
        /* An ascii-range separator character. End of token. */
        break; 
Changes to ext/fts5/fts5_unicode2.c.
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

40
41

42
43
44
45

46
47
48
49
50
51
52
53
54
55
56















57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

73
74
75
76
77
78
79
80
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int fts5_remove_diacritic(int c){
  unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 

     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 

    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
    62924, 63050, 63082, 63274, 63390, 

  };
  char aChar[] = {
    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
    'e',  'i',  'o',  'u',  'y',  















  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );

  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/







|





|
|
|
>
|
|
>
|
|
|
|
>


<
<
<
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
















>
|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int fts5_remove_diacritic(int c, int bComplex){
  unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
     3456,  3696,  3712,  3728,  3744,  3766,  3832,  3896, 
     3912,  3928,  3944,  3968,  4008,  4040,  4056,  4106, 
     4138,  4170,  4202,  4234,  4266,  4296,  4312,  4344, 
     4408,  4424,  4442,  4472,  4488,  4504,  6148,  6198, 
     6264,  6280,  6360,  6429,  6505,  6529, 61448, 61468, 
    61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, 
    61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, 
    61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, 
    62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, 
    62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, 
    62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, 
    63182, 63242, 63274, 63310, 63368, 63390, 
  };
  char aChar[] = {



    '\0',      'a'|0x00,  'c'|0x00,  'e'|0x00,  'i'|0x00,  'n'|0x00,  
    'o'|0x00,  'u'|0x00,  'y'|0x00,  'y'|0x00,  'a'|0x00,  'c'|0x00,  
    'd'|0x00,  'e'|0x00,  'e'|0x00,  'g'|0x00,  'h'|0x00,  'i'|0x00,  
    'j'|0x00,  'k'|0x00,  'l'|0x00,  'n'|0x00,  'o'|0x00,  'r'|0x00,  
    's'|0x00,  't'|0x00,  'u'|0x00,  'u'|0x00,  'w'|0x00,  'y'|0x00,  
    'z'|0x00,  'o'|0x00,  'u'|0x00,  'a'|0x00,  'i'|0x00,  'o'|0x00,  
    'u'|0x00,  'u'|0x80,  'a'|0x80,  'g'|0x00,  'k'|0x00,  'o'|0x00,  
    'o'|0x80,  'j'|0x00,  'g'|0x00,  'n'|0x00,  'a'|0x80,  'a'|0x00,  
    'e'|0x00,  'i'|0x00,  'o'|0x00,  'r'|0x00,  'u'|0x00,  's'|0x00,  
    't'|0x00,  'h'|0x00,  'a'|0x00,  'e'|0x00,  'o'|0x80,  'o'|0x00,  
    'o'|0x80,  'y'|0x00,  '\0',      '\0',      '\0',      '\0',      
    '\0',      '\0',      '\0',      '\0',      'a'|0x00,  'b'|0x00,  
    'c'|0x80,  'd'|0x00,  'd'|0x00,  'e'|0x80,  'e'|0x00,  'e'|0x80,  
    'f'|0x00,  'g'|0x00,  'h'|0x00,  'h'|0x00,  'i'|0x00,  'i'|0x80,  
    'k'|0x00,  'l'|0x00,  'l'|0x80,  'l'|0x00,  'm'|0x00,  'n'|0x00,  
    'o'|0x80,  'p'|0x00,  'r'|0x00,  'r'|0x80,  'r'|0x00,  's'|0x00,  
    's'|0x80,  't'|0x00,  'u'|0x00,  'u'|0x80,  'v'|0x00,  'w'|0x00,  
    'w'|0x00,  'x'|0x00,  'y'|0x00,  'z'|0x00,  'h'|0x00,  't'|0x00,  
    'w'|0x00,  'y'|0x00,  'a'|0x00,  'a'|0x80,  'a'|0x80,  'a'|0x80,  
    'e'|0x00,  'e'|0x80,  'e'|0x80,  'i'|0x00,  'o'|0x00,  'o'|0x80,  
    'o'|0x80,  'o'|0x80,  'u'|0x00,  'u'|0x80,  'u'|0x80,  'y'|0x00,  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );
  if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to







|







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to
216
217
218
219
220
221
222

223

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }


    if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);

  }
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}


#if 0
int sqlite3Fts5UnicodeNCat(void) { 
  return 32;
}
#endif

int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;
            case 'f': aArray[2] = 1; break;







>
|
>









<
<
<
<
<
<
<







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250







251
252
253
254
255
256
257
    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }

    if( eRemoveDiacritic ){
      ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2);
    }
  }
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}








int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 
  aArray[0] = 1;
  switch( zCat[0] ){
    case 'C':
          switch( zCat[1] ){
            case 'c': aArray[1] = 1; break;
            case 'f': aArray[2] = 1; break;
752
753
754
755
756
757
758
759
760
761
762
763
764
void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
  int i = 0;
  int iTbl = 0;
  while( i<128 ){
    int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = (u8)bToken;
    }
    iTbl++;
  }
}








|




<
763
764
765
766
767
768
769
770
771
772
773
774

void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
  int i = 0;
  int iTbl = 0;
  while( i<128 ){
    int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
    int n = (aFts5UnicodeData[iTbl] >> 5) + i;
    for(; i<128 && i<n; i++){
      aAscii[i] = bToken;
    }
    iTbl++;
  }
}

Changes to ext/fts5/test/fts5tokenizer.test.
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 2'
  );
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.4 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 10'
  );
} {1 {error in tokenizer constructor}}







|







185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 3'
  );
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.4 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 10'
  );
} {1 {error in tokenizer constructor}}
Added ext/fts5/test/fts5umlaut.test.


































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5umlaut

# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  CREATE VIRTUAL TABLE t2 USING fts5(
      x, 
      tokenize="unicode61 remove_diacritics 2"
  );
}

foreach {tn q res1 res2} {
  1 "Hà Nội"                  0 1
  2 "Hà Noi"                  1 1
  3 "Ha Noi"                  1 1
  4 "Ha N\u1ed9i"             0 1
  5 "Ha N\u006fi"             1 1
  6 "Ha N\u006f\u0302i"       1 1
  7 "Ha N\u006f\u0323\u0302i" 1 1
} {
  do_execsql_test 1.$tn.1 {
    DELETE FROM t1;
    INSERT INTO t1(rowid, x) VALUES (1, 'Ha Noi');
    SELECT count(*) FROM t1($q)
  } $res1
  do_execsql_test 1.$tn.2 {
    DELETE FROM t1;
    INSERT INTO t1(rowid, x) VALUES (1, $q);
    SELECT count(*) FROM t1('Ha Noi')
  } $res1

  do_execsql_test 1.$tn.2 {
    DELETE FROM t2;
    INSERT INTO t2(rowid, x) VALUES (1, 'Ha Noi');
    SELECT count(*) FROM t2($q)
  } $res2
  do_execsql_test 1.$tn.2 {
    DELETE FROM t2;
    INSERT INTO t2(rowid, x) VALUES (1, $q);
    SELECT count(*) FROM t2('Ha Noi')
  } $res2
}

finish_test

Changes to ext/fts5/test/fts5unicode3.test.
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63

tl_load_casefolding_txt $CF
foreach x [an_load_unicodedata_text $UD] {
  set aNotAlnum($x) 1
}

foreach {y} [rd_load_unicodedata_text $UD] {
  foreach {code ascii} $y {}
  if {$ascii==""} {
    set int 0
  } else {
    binary scan $ascii c int
  }
  set aDiacritic($code) $int

}

proc tcl_fold {i {bRemoveDiacritic 0}} {
  global tl_lookup_table
  global aDiacritic


  if {[info exists tl_lookup_table($i)]} {
    set i $tl_lookup_table($i)
  }
  if {$bRemoveDiacritic && [info exists aDiacritic($i)]} {
    set i $aDiacritic($i)
  }
  expr $i
}
db func tcl_fold tcl_fold

proc tcl_isalnum {i} {
  global aNotAlnum







|





|
>





>




|
|







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

tl_load_casefolding_txt $CF
foreach x [an_load_unicodedata_text $UD] {
  set aNotAlnum($x) 1
}

foreach {y} [rd_load_unicodedata_text $UD] {
  foreach {code ascii f} $y {}
  if {$ascii==""} {
    set int 0
  } else {
    binary scan $ascii c int
  }
  set aDiacritic($code,$f) $int
  if {$f==0} { set aDiacritic($code,1) $int }
}

proc tcl_fold {i {bRemoveDiacritic 0}} {
  global tl_lookup_table
  global aDiacritic
  set f [expr $bRemoveDiacritic==2]

  if {[info exists tl_lookup_table($i)]} {
    set i $tl_lookup_table($i)
  }
  if {$bRemoveDiacritic && [info exists aDiacritic($i,$f)]} {
    set i $aDiacritic($i,$f)
  }
  expr $i
}
db func tcl_fold tcl_fold

proc tcl_isalnum {i} {
  global aNotAlnum
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96










97
98
99
100
101
102
103
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
} {0 {}}

do_execsql_test 1.2 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii 
  WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
} {0 {}}











do_execsql_test 1.3 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )







|








>
>
>
>
>
>
>
>
>
>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
} {0 {}}

do_execsql_test 1.2.1 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii 
  WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
} {0 {}}

do_execsql_test 1.2.2 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii 
  WHERE fts5_fold(i,2)!=CAST(tcl_fold(i,2) AS int);
} {0 {}}

do_execsql_test 1.3 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
Changes to main.mk.
994
995
996
997
998
999
1000




1001
1002
1003
1004
1005
1006
1007

showwal$(EXE):	$(TOP)/tool/showwal.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o showwal$(EXE) \
		$(TOP)/tool/showwal.c sqlite3.o $(THREADLIB)

showshm$(EXE):	$(TOP)/tool/showshm.c
	$(TCC) -o showshm$(EXE) $(TOP)/tool/showshm.c





changeset$(EXE):	$(TOP)/ext/session/changeset.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o changeset$(EXE) \
		$(TOP)/ext/session/changeset.c sqlite3.o $(THREADLIB)

changesetfuzz$(EXE):	$(TOP)/ext/session/changesetfuzz.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o changesetfuzz$(EXE) \







>
>
>
>







994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011

showwal$(EXE):	$(TOP)/tool/showwal.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o showwal$(EXE) \
		$(TOP)/tool/showwal.c sqlite3.o $(THREADLIB)

showshm$(EXE):	$(TOP)/tool/showshm.c
	$(TCC) -o showshm$(EXE) $(TOP)/tool/showshm.c

index_usage$(EXE): $(TOP)/tool/index_usage.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_DEPRECATED -o index_usage$(EXE) \
		$(TOP)/tool/index_usage.c sqlite3.o $(THREADLIB)

changeset$(EXE):	$(TOP)/ext/session/changeset.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o changeset$(EXE) \
		$(TOP)/ext/session/changeset.c sqlite3.o $(THREADLIB)

changesetfuzz$(EXE):	$(TOP)/ext/session/changesetfuzz.c sqlite3.o
	$(TCC) -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION -o changesetfuzz$(EXE) \
Changes to src/build.c.
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239


  /* Get the VDBE program ready for execution
  */
  if( v && pParse->nErr==0 && !db->mallocFailed ){
    /* A minimum of one cursor is required if autoincrement is used
    *  See ticket [a696379c1f08866] */
    if( pParse->pAinc!=0 && pParse->nTab==0 ) pParse->nTab = 1;
    sqlite3VdbeMakeReady(v, pParse);
    pParse->rc = SQLITE_DONE;
  }else{
    pParse->rc = SQLITE_ERROR;
  }
}








|







225
226
227
228
229
230
231
232
233
234
235
236
237
238
239


  /* Get the VDBE program ready for execution
  */
  if( v && pParse->nErr==0 && !db->mallocFailed ){
    /* A minimum of one cursor is required if autoincrement is used
    *  See ticket [a696379c1f08866] */
    assert( pParse->pAinc==0 || pParse->nTab>0 );
    sqlite3VdbeMakeReady(v, pParse);
    pParse->rc = SQLITE_DONE;
  }else{
    pParse->rc = SQLITE_ERROR;
  }
}

Changes to src/hash.c.
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
static unsigned int strHashN(const char *z, int n){
  unsigned int h = 0;
  int i;
  for(i=0; i<n; i++){
    /* Knuth multiplicative hashing.  (Sorting & Searching, p. 510).
    ** 0x9e3779b1 is 2654435761 which is the closest prime number to
    ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. */
    h += sqlite3UpperToLower[z[i]];
    h *= 0x9e3779b1;
  }
  return h;
}
#endif /* SQLITE_ENABLE_NORMALIZE */









|







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
static unsigned int strHashN(const char *z, int n){
  unsigned int h = 0;
  int i;
  for(i=0; i<n; i++){
    /* Knuth multiplicative hashing.  (Sorting & Searching, p. 510).
    ** 0x9e3779b1 is 2654435761 which is the closest prime number to
    ** (2**32)*golden_ratio, where golden_ratio = (sqrt(5) - 1)/2. */
    h += sqlite3UpperToLower[(unsigned char)z[i]];
    h *= 0x9e3779b1;
  }
  return h;
}
#endif /* SQLITE_ENABLE_NORMALIZE */


Changes to src/hash.h.
64
65
66
67
68
69
70



71
72
73
74
75
76
77

/*
** Access routines.  To delete, insert a NULL pointer.
*/
void sqlite3HashInit(Hash*);
void *sqlite3HashInsert(Hash*, const char *pKey, void *pData);
void *sqlite3HashFind(const Hash*, const char *pKey);



void sqlite3HashClear(Hash*);

/*
** Macros for looping over all elements of a hash table.  The idiom is
** like this:
**
**   Hash h;







>
>
>







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

/*
** Access routines.  To delete, insert a NULL pointer.
*/
void sqlite3HashInit(Hash*);
void *sqlite3HashInsert(Hash*, const char *pKey, void *pData);
void *sqlite3HashFind(const Hash*, const char *pKey);
#ifdef SQLITE_ENABLE_NORMALIZE
void *sqlite3HashFindN(const Hash *pH, const char *pKey, int nKey);
#endif
void sqlite3HashClear(Hash*);

/*
** Macros for looping over all elements of a hash table.  The idiom is
** like this:
**
**   Hash h;
Changes to src/insert.c.
315
316
317
318
319
320
321

322
323
324
325
326
327
328
    aOp[3].p5 = SQLITE_JUMPIFNULL;
    aOp[4].p2 = memId+1;
    aOp[5].p3 = memId;
    aOp[6].p1 = memId;
    aOp[7].p2 = memId+2;
    aOp[7].p1 = memId;
    aOp[10].p2 = memId;

  }
}

/*
** Update the maximum rowid for an autoincrement calculation.
**
** This routine should be called when the regRowid register holds a







>







315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
    aOp[3].p5 = SQLITE_JUMPIFNULL;
    aOp[4].p2 = memId+1;
    aOp[5].p3 = memId;
    aOp[6].p1 = memId;
    aOp[7].p2 = memId+2;
    aOp[7].p1 = memId;
    aOp[10].p2 = memId;
    if( pParse->nTab==0 ) pParse->nTab = 1;
  }
}

/*
** Update the maximum rowid for an autoincrement calculation.
**
** This routine should be called when the regRowid register holds a
Changes to src/main.c.
1992
1993
1994
1995
1996
1997
1998

1999
2000
2001
2002
2003
2004
2005
    (void)SQLITE_MISUSE_BKPT;
    return 0;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  pOld = db->pTraceArg;
  db->mTrace = xTrace ? SQLITE_TRACE_LEGACY : 0;

  db->xTrace = (int(*)(u32,void*,void*,void*))xTrace;
  db->pTraceArg = pArg;
  sqlite3_mutex_leave(db->mutex);
  return pOld;
}
#endif /* SQLITE_OMIT_DEPRECATED */








>







1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
    (void)SQLITE_MISUSE_BKPT;
    return 0;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  pOld = db->pTraceArg;
  db->mTrace = xTrace ? SQLITE_TRACE_LEGACY : 0;
  if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE;
  db->xTrace = (int(*)(u32,void*,void*,void*))xTrace;
  db->pTraceArg = pArg;
  sqlite3_mutex_leave(db->mutex);
  return pOld;
}
#endif /* SQLITE_OMIT_DEPRECATED */

2016
2017
2018
2019
2020
2021
2022



2023
2024
2025
2026
2027
2028
2029
    return SQLITE_MISUSE_BKPT;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  if( mTrace==0 ) xTrace = 0;
  if( xTrace==0 ) mTrace = 0;
  db->mTrace = mTrace;



  db->xTrace = xTrace;
  db->pTraceArg = pArg;
  sqlite3_mutex_leave(db->mutex);
  return SQLITE_OK;
}

#ifndef SQLITE_OMIT_DEPRECATED







>
>
>







2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
    return SQLITE_MISUSE_BKPT;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  if( mTrace==0 ) xTrace = 0;
  if( xTrace==0 ) mTrace = 0;
  db->mTrace = mTrace;
#ifndef SQLITE_OMIT_DEPRECATED
  if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE;
#endif
  db->xTrace = xTrace;
  db->pTraceArg = pArg;
  sqlite3_mutex_leave(db->mutex);
  return SQLITE_OK;
}

#ifndef SQLITE_OMIT_DEPRECATED
2048
2049
2050
2051
2052
2053
2054


2055
2056
2057
2058
2059
2060
2061
    return 0;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  pOld = db->pProfileArg;
  db->xProfile = xProfile;
  db->pProfileArg = pArg;


  sqlite3_mutex_leave(db->mutex);
  return pOld;
}
#endif /* SQLITE_OMIT_DEPRECATED */
#endif /* SQLITE_OMIT_TRACE */

/*







>
>







2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
    return 0;
  }
#endif
  sqlite3_mutex_enter(db->mutex);
  pOld = db->pProfileArg;
  db->xProfile = xProfile;
  db->pProfileArg = pArg;
  db->mTrace &= SQLITE_TRACE_NONLEGACY_MASK;
  if( db->xProfile ) db->mTrace |= SQLITE_TRACE_XPROFILE;
  sqlite3_mutex_leave(db->mutex);
  return pOld;
}
#endif /* SQLITE_OMIT_DEPRECATED */
#endif /* SQLITE_OMIT_TRACE */

/*
Changes to src/parse.y.
675
676
677
678
679
680
681






682
683
684
685
686
687
688
      A = sqlite3SrcListAppendFromTerm(pParse,A,0,0,&Z,0,N,U);
      if( A ){
        struct SrcList_item *pNew = &A->a[A->nSrc-1];
        struct SrcList_item *pOld = F->a;
        pNew->zName = pOld->zName;
        pNew->zDatabase = pOld->zDatabase;
        pNew->pSelect = pOld->pSelect;






        pOld->zName = pOld->zDatabase = 0;
        pOld->pSelect = 0;
      }
      sqlite3SrcListDelete(pParse->db, F);
    }else{
      Select *pSubquery;
      sqlite3SrcListShiftJoinType(F);







>
>
>
>
>
>







675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
      A = sqlite3SrcListAppendFromTerm(pParse,A,0,0,&Z,0,N,U);
      if( A ){
        struct SrcList_item *pNew = &A->a[A->nSrc-1];
        struct SrcList_item *pOld = F->a;
        pNew->zName = pOld->zName;
        pNew->zDatabase = pOld->zDatabase;
        pNew->pSelect = pOld->pSelect;
        if( pOld->fg.isTabFunc ){
          pNew->u1.pFuncArg = pOld->u1.pFuncArg;
          pOld->u1.pFuncArg = 0;
          pOld->fg.isTabFunc = 0;
          pNew->fg.isTabFunc = 1;
        }
        pOld->zName = pOld->zDatabase = 0;
        pOld->pSelect = 0;
      }
      sqlite3SrcListDelete(pParse->db, F);
    }else{
      Select *pSubquery;
      sqlite3SrcListShiftJoinType(F);
Changes to src/pcache1.c.
98
99
100
101
102
103
104

105
106
107
108
109
110
111
  unsigned int iKey;             /* Key value (page number) */
  u8 isBulkLocal;                /* This page from bulk local storage */
  u8 isAnchor;                   /* This is the PGroup.lru element */
  PgHdr1 *pNext;                 /* Next in hash table chain */
  PCache1 *pCache;               /* Cache that currently owns this page */
  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */

};

/*
** A page is pinned if it is not on the LRU list.  To be "pinned" means
** that the page is in active use and must not be deallocated.
*/
#define PAGE_IS_PINNED(p)    ((p)->pLruNext==0)







>







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  unsigned int iKey;             /* Key value (page number) */
  u8 isBulkLocal;                /* This page from bulk local storage */
  u8 isAnchor;                   /* This is the PGroup.lru element */
  PgHdr1 *pNext;                 /* Next in hash table chain */
  PCache1 *pCache;               /* Cache that currently owns this page */
  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */
                                 /* NB: pLruPrev is only valid if pLruNext!=0 */
};

/*
** A page is pinned if it is not on the LRU list.  To be "pinned" means
** that the page is in active use and must not be deallocated.
*/
#define PAGE_IS_PINNED(p)    ((p)->pLruNext==0)
566
567
568
569
570
571
572
573

574
575
576
577
578
579
580
  assert( PAGE_IS_UNPINNED(pPage) );
  assert( pPage->pLruNext );
  assert( pPage->pLruPrev );
  assert( sqlite3_mutex_held(pPage->pCache->pGroup->mutex) );
  pPage->pLruPrev->pLruNext = pPage->pLruNext;
  pPage->pLruNext->pLruPrev = pPage->pLruPrev;
  pPage->pLruNext = 0;
  pPage->pLruPrev = 0;

  assert( pPage->isAnchor==0 );
  assert( pPage->pCache->pGroup->lru.isAnchor==1 );
  pPage->pCache->nRecyclable--;
  return pPage;
}









|
>







567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
  assert( PAGE_IS_UNPINNED(pPage) );
  assert( pPage->pLruNext );
  assert( pPage->pLruPrev );
  assert( sqlite3_mutex_held(pPage->pCache->pGroup->mutex) );
  pPage->pLruPrev->pLruNext = pPage->pLruNext;
  pPage->pLruNext->pLruPrev = pPage->pLruPrev;
  pPage->pLruNext = 0;
  /* pPage->pLruPrev = 0;
  ** No need to clear pLruPrev as it is never accessed if pLruNext is 0 */
  assert( pPage->isAnchor==0 );
  assert( pPage->pCache->pGroup->lru.isAnchor==1 );
  pPage->pCache->nRecyclable--;
  return pPage;
}


904
905
906
907
908
909
910
911
912

913
914
915
916
917
918
919

  if( pPage ){
    unsigned int h = iKey % pCache->nHash;
    pCache->nPage++;
    pPage->iKey = iKey;
    pPage->pNext = pCache->apHash[h];
    pPage->pCache = pCache;
    pPage->pLruPrev = 0;
    pPage->pLruNext = 0;

    *(void **)pPage->page.pExtra = 0;
    pCache->apHash[h] = pPage;
    if( iKey>pCache->iMaxKey ){
      pCache->iMaxKey = iKey;
    }
  }
  return pPage;







|
|
>







906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922

  if( pPage ){
    unsigned int h = iKey % pCache->nHash;
    pCache->nPage++;
    pPage->iKey = iKey;
    pPage->pNext = pCache->apHash[h];
    pPage->pCache = pCache;
    pPage->pLruNext = 0;
    /* pPage->pLruPrev = 0;
    ** No need to clear pLruPrev since it is not accessed when pLruNext==0 */
    *(void **)pPage->page.pExtra = 0;
    pCache->apHash[h] = pPage;
    if( iKey>pCache->iMaxKey ){
      pCache->iMaxKey = iKey;
    }
  }
  return pPage;
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
 
  assert( pPage->pCache==pCache );
  pcache1EnterMutex(pGroup);

  /* It is an error to call this function if the page is already 
  ** part of the PGroup LRU list.
  */
  assert( pPage->pLruPrev==0 && pPage->pLruNext==0 );
  assert( PAGE_IS_PINNED(pPage) );

  if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){
    pcache1RemoveFromHash(pPage, 1);
  }else{
    /* Add the page to the PGroup LRU list. */
    PgHdr1 **ppFirst = &pGroup->lru.pLruNext;







|







1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
 
  assert( pPage->pCache==pCache );
  pcache1EnterMutex(pGroup);

  /* It is an error to call this function if the page is already 
  ** part of the PGroup LRU list.
  */
  assert( pPage->pLruNext==0 );
  assert( PAGE_IS_PINNED(pPage) );

  if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){
    pcache1RemoveFromHash(pPage, 1);
  }else{
    /* Add the page to the PGroup LRU list. */
    PgHdr1 **ppFirst = &pGroup->lru.pLruNext;
Changes to src/prepare.c.
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
** normalized version of the specified SQL string.  This should take into
** account any potential expansion that could occur (e.g. via IN clauses
** being expanded, etc).  This size returned is the total number of bytes
** including the NUL terminator.
*/
static int estimateNormalizedSize(
  const char *zSql, /* The original SQL string */
  int nSql,         /* Length of original SQL string */
  u8 prepFlags      /* The flags passed to sqlite3_prepare_v3() */
){
  int nOut = nSql + 4;
  const char *z = zSql;
  while( nOut<nSql*5 ){
    while( z[0]!=0 && z[0]!='I' && z[0]!='i' ){ z++; }
    if( z[0]==0 ) break;
    z++;







|
<







787
788
789
790
791
792
793
794

795
796
797
798
799
800
801
** normalized version of the specified SQL string.  This should take into
** account any potential expansion that could occur (e.g. via IN clauses
** being expanded, etc).  This size returned is the total number of bytes
** including the NUL terminator.
*/
static int estimateNormalizedSize(
  const char *zSql, /* The original SQL string */
  int nSql          /* Length of original SQL string */

){
  int nOut = nSql + 4;
  const char *z = zSql;
  while( nOut<nSql*5 ){
    while( z[0]!=0 && z[0]!='I' && z[0]!='i' ){ z++; }
    if( z[0]==0 ) break;
    z++;
843
844
845
846
847
848
849
850
851
852
853
854
855

856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
      zOut[j++] = sqlite3Tolower(zSql[iIn+k]);
    }
  }
  *piOut = j;
}

/*
** Perform normalization of the SQL contained in the prepared statement and
** store the result in the zNormSql field.  The schema for the associated
** databases are consulted while performing the normalization in order to
** determine if a token appears to be an identifier.  All identifiers are
** left intact in the normalized SQL and all literals are replaced with a
** single '?'.

*/
void sqlite3Normalize(
  Vdbe *pVdbe,      /* VM being reprepared */
  const char *zSql, /* The original SQL string */
  int nSql,         /* Size of the input string in bytes */
  u8 prepFlags      /* The flags passed to sqlite3_prepare_v3() */
){
  sqlite3 *db;           /* Database handle. */
  char *z;               /* The output string */
  int nZ;                /* Size of the output string in bytes */
  int i;                 /* Next character to read from zSql[] */
  int j;                 /* Next character to fill in on z[] */
  int tokenType = 0;     /* Type of the next token */
  int prevTokenType = 0; /* Type of the previous token, except spaces */
  int n;                 /* Size of the next token */
  int nParen = 0;        /* Nesting level of parenthesis */
  Hash inHash;           /* Table of parenthesis levels to output index. */

  db = sqlite3VdbeDb(pVdbe);
  assert( db!=0 );
  assert( pVdbe->zNormSql==0 );
  if( zSql==0 ) return;
  nZ = estimateNormalizedSize(zSql, nSql, prepFlags);
  z = sqlite3DbMallocRawNN(db, nZ);
  if( z==0 ) return;
  sqlite3HashInit(&inHash);
  for(i=j=0; i<nSql && zSql[i]; i+=n){
    int flags = 0;
    if( tokenType!=TK_SPACE ) prevTokenType = tokenType;
    n = sqlite3GetTokenNormalized((unsigned char*)zSql+i, &tokenType, &flags);
    switch( tokenType ){
      case TK_SPACE: {
        break;
      }
      case TK_ILLEGAL: {
        sqlite3DbFree(db, z);
        sqlite3HashClear(&inHash);
        return;
      }
      case TK_STRING:
      case TK_INTEGER:
      case TK_FLOAT:
      case TK_VARIABLE:
      case TK_BLOB: {
        z[j++] = '?';







|
<
|
<
<
<
>

|


|
<














<
|
|

|










|
<
<







842
843
844
845
846
847
848
849

850



851
852
853
854
855
856

857
858
859
860
861
862
863
864
865
866
867
868
869
870

871
872
873
874
875
876
877
878
879
880
881
882
883
884
885


886
887
888
889
890
891
892
      zOut[j++] = sqlite3Tolower(zSql[iIn+k]);
    }
  }
  *piOut = j;
}

/*
** Compute a normalization of the SQL given by zSql[0..nSql-1].  Return

** the normalization in space obtained from sqlite3DbMalloc().  Or return



** NULL if anything goes wrong or if zSql is NULL.
*/
char *sqlite3Normalize(
  Vdbe *pVdbe,      /* VM being reprepared */
  const char *zSql, /* The original SQL string */
  int nSql          /* Size of the input string in bytes */

){
  sqlite3 *db;           /* Database handle. */
  char *z;               /* The output string */
  int nZ;                /* Size of the output string in bytes */
  int i;                 /* Next character to read from zSql[] */
  int j;                 /* Next character to fill in on z[] */
  int tokenType = 0;     /* Type of the next token */
  int prevTokenType = 0; /* Type of the previous token, except spaces */
  int n;                 /* Size of the next token */
  int nParen = 0;        /* Nesting level of parenthesis */
  Hash inHash;           /* Table of parenthesis levels to output index. */

  db = sqlite3VdbeDb(pVdbe);
  assert( db!=0 );

  if( zSql==0 ) return 0;
  nZ = estimateNormalizedSize(zSql, nSql);
  z = sqlite3DbMallocRawNN(db, nZ);
  if( z==0 ) goto normalizeError;
  sqlite3HashInit(&inHash);
  for(i=j=0; i<nSql && zSql[i]; i+=n){
    int flags = 0;
    if( tokenType!=TK_SPACE ) prevTokenType = tokenType;
    n = sqlite3GetTokenNormalized((unsigned char*)zSql+i, &tokenType, &flags);
    switch( tokenType ){
      case TK_SPACE: {
        break;
      }
      case TK_ILLEGAL: {
        goto normalizeError;


      }
      case TK_STRING:
      case TK_INTEGER:
      case TK_FLOAT:
      case TK_VARIABLE:
      case TK_BLOB: {
        z[j++] = '?';
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994


995


996

997
998
999
1000
1001
1002
1003
          int i2 = i, n2 = n, rc = SQLITE_OK;
          if( nParen>0 ){
            assert( nParen<nSql );
            sqlite3HashInsert(&inHash, zSql+nParen, 0);
          }
          if( flags&SQLITE_TOKEN_QUOTED ){ i2++; n2-=2; }
          if( shouldTreatAsIdentifier(db, zSql+i2, n2, &rc)==0 ){
            if( rc!=SQLITE_OK ){
              sqlite3DbFree(db, z);
              sqlite3HashClear(&inHash);
              return;
            }
            if( sqlite3_keyword_check(zSql+i2, n2)==0 ){
              z[j++] = '?';
              break;
            }
          }
        }
        copyNormalizedToken(zSql, i, n, flags, z, &j);
        break;
      }
    }
  }
  assert( j<nZ && "one" );
  while( j>0 && z[j-1]==' ' ){ j--; }
  if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; }
  z[j] = 0;
  assert( j<nZ && "two" );


  pVdbe->zNormSql = z;


  sqlite3HashClear(&inHash);

}
#endif /* SQLITE_ENABLE_NORMALIZE */

/*
** Rerun the compilation of a statement after a schema change.
**
** If the statement is successfully recompiled, return SQLITE_OK. Otherwise,







|
<
<
<
<
















>
>
|
>
>

>







959
960
961
962
963
964
965
966




967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
          int i2 = i, n2 = n, rc = SQLITE_OK;
          if( nParen>0 ){
            assert( nParen<nSql );
            sqlite3HashInsert(&inHash, zSql+nParen, 0);
          }
          if( flags&SQLITE_TOKEN_QUOTED ){ i2++; n2-=2; }
          if( shouldTreatAsIdentifier(db, zSql+i2, n2, &rc)==0 ){
            if( rc!=SQLITE_OK ) goto normalizeError;




            if( sqlite3_keyword_check(zSql+i2, n2)==0 ){
              z[j++] = '?';
              break;
            }
          }
        }
        copyNormalizedToken(zSql, i, n, flags, z, &j);
        break;
      }
    }
  }
  assert( j<nZ && "one" );
  while( j>0 && z[j-1]==' ' ){ j--; }
  if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; }
  z[j] = 0;
  assert( j<nZ && "two" );
  sqlite3HashClear(&inHash);
  return z;

normalizeError:
  sqlite3DbFree(db, z);
  sqlite3HashClear(&inHash);
  return 0;
}
#endif /* SQLITE_ENABLE_NORMALIZE */

/*
** Rerun the compilation of a statement after a schema change.
**
** If the statement is successfully recompiled, return SQLITE_OK. Otherwise,
Changes to src/resolve.c.
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  db = pParse->db;
  pDup = sqlite3ExprDup(db, pOrig, 0);
  if( pDup!=0 ){
    if( zType[0]!='G' ) incrAggFunctionDepth(pDup, nSubquery);
    if( pExpr->op==TK_COLLATE ){
      pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken);
    }
    ExprSetProperty(pDup, EP_Alias);

    /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This 
    ** prevents ExprDelete() from deleting the Expr structure itself,
    ** allowing it to be repopulated by the memcpy() on the following line.
    ** The pExpr->u.zToken might point into memory that will be freed by the
    ** sqlite3DbFree(db, pDup) on the last line of this block, so be sure to
    ** make a copy of the token before doing the sqlite3DbFree().







|







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  db = pParse->db;
  pDup = sqlite3ExprDup(db, pOrig, 0);
  if( pDup!=0 ){
    if( zType[0]!='G' ) incrAggFunctionDepth(pDup, nSubquery);
    if( pExpr->op==TK_COLLATE ){
      pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken);
    }
//    ExprSetProperty(pDup, EP_Alias);

    /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This 
    ** prevents ExprDelete() from deleting the Expr structure itself,
    ** allowing it to be repopulated by the memcpy() on the following line.
    ** The pExpr->u.zToken might point into memory that will be freed by the
    ** sqlite3DbFree(db, pDup) on the last line of this block, so be sure to
    ** make a copy of the token before doing the sqlite3DbFree().
Changes to src/shell.c.in.
1003
1004
1005
1006
1007
1008
1009

1010
1011
1012
1013
1014
1015
1016
  u8 autoEQP;            /* Run EXPLAIN QUERY PLAN prior to seach SQL stmt */
  u8 autoEQPtest;        /* autoEQP is in test mode */
  u8 statsOn;            /* True to display memory stats before each finalize */
  u8 scanstatsOn;        /* True to display scan stats before each finalize */
  u8 openMode;           /* SHELL_OPEN_NORMAL, _APPENDVFS, or _ZIPFILE */
  u8 doXdgOpen;          /* Invoke start/open/xdg-open in output_reset() */
  u8 nEqpLevel;          /* Depth of the EQP output graph */

  unsigned mEqpLines;    /* Mask of veritical lines in the EQP output graph */
  int outCount;          /* Revert to stdout when reaching zero */
  int cnt;               /* Number of records displayed so far */
  FILE *out;             /* Write results here */
  FILE *traceOut;        /* Output for sqlite3_trace() */
  int nErr;              /* Number of errors seen */
  int mode;              /* An output mode setting */







>







1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  u8 autoEQP;            /* Run EXPLAIN QUERY PLAN prior to seach SQL stmt */
  u8 autoEQPtest;        /* autoEQP is in test mode */
  u8 statsOn;            /* True to display memory stats before each finalize */
  u8 scanstatsOn;        /* True to display scan stats before each finalize */
  u8 openMode;           /* SHELL_OPEN_NORMAL, _APPENDVFS, or _ZIPFILE */
  u8 doXdgOpen;          /* Invoke start/open/xdg-open in output_reset() */
  u8 nEqpLevel;          /* Depth of the EQP output graph */
  u8 eTraceType;         /* SHELL_TRACE_* value for type of trace */
  unsigned mEqpLines;    /* Mask of veritical lines in the EQP output graph */
  int outCount;          /* Revert to stdout when reaching zero */
  int cnt;               /* Number of records displayed so far */
  FILE *out;             /* Write results here */
  FILE *traceOut;        /* Output for sqlite3_trace() */
  int nErr;              /* Number of errors seen */
  int mode;              /* An output mode setting */
1062
1063
1064
1065
1066
1067
1068






1069
1070
1071
1072
1073
1074
1075
#define SHELL_OPEN_UNSPEC      0      /* No open-mode specified */
#define SHELL_OPEN_NORMAL      1      /* Normal database file */
#define SHELL_OPEN_APPENDVFS   2      /* Use appendvfs */
#define SHELL_OPEN_ZIPFILE     3      /* Use the zipfile virtual table */
#define SHELL_OPEN_READONLY    4      /* Open a normal database read-only */
#define SHELL_OPEN_DESERIALIZE 5      /* Open using sqlite3_deserialize() */







/*
** These are the allowed shellFlgs values
*/
#define SHFLG_Pagecache      0x00000001 /* The --pagecache option is used */
#define SHFLG_Lookaside      0x00000002 /* Lookaside memory is used */
#define SHFLG_Backslash      0x00000004 /* The --backslash option is used */
#define SHFLG_PreserveRowid  0x00000008 /* .dump preserves rowid values */







>
>
>
>
>
>







1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
#define SHELL_OPEN_UNSPEC      0      /* No open-mode specified */
#define SHELL_OPEN_NORMAL      1      /* Normal database file */
#define SHELL_OPEN_APPENDVFS   2      /* Use appendvfs */
#define SHELL_OPEN_ZIPFILE     3      /* Use the zipfile virtual table */
#define SHELL_OPEN_READONLY    4      /* Open a normal database read-only */
#define SHELL_OPEN_DESERIALIZE 5      /* Open using sqlite3_deserialize() */

/* Allowed values for ShellState.eTraceType
*/
#define SHELL_TRACE_PLAIN      0      /* Show input SQL text */
#define SHELL_TRACE_EXPANDED   1      /* Show expanded SQL text */
#define SHELL_TRACE_NORMALIZED 2      /* Show normalized SQL text */

/*
** These are the allowed shellFlgs values
*/
#define SHFLG_Pagecache      0x00000001 /* The --pagecache option is used */
#define SHFLG_Lookaside      0x00000002 /* Lookaside memory is used */
#define SHFLG_Backslash      0x00000004 /* The --backslash option is used */
#define SHFLG_PreserveRowid  0x00000008 /* .dump preserves rowid values */
3488
3489
3490
3491
3492
3493
3494

3495














3496
3497
3498
3499
3500
3501
3502
#ifndef SQLITE_NOHAVE_SYSTEM
  ".system CMD ARGS...      Run CMD ARGS... in a system shell",
#endif
  ".tables ?TABLE?          List names of tables matching LIKE pattern TABLE",
  ".testcase NAME           Begin redirecting output to 'testcase-out.txt'",
  ".timeout MS              Try opening locked tables for MS milliseconds",
  ".timer on|off            Turn SQL timer on or off",

  ".trace FILE|off          Output each SQL statement as it is run",














  ".vfsinfo ?AUX?           Information about the top-level VFS",
  ".vfslist                 List all available VFSes",
  ".vfsname ?AUX?           Print the name of the VFS stack",
  ".width NUM1 NUM2 ...     Set column widths for \"column\" mode",
  "     Negative values right-justify",
};








>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
#ifndef SQLITE_NOHAVE_SYSTEM
  ".system CMD ARGS...      Run CMD ARGS... in a system shell",
#endif
  ".tables ?TABLE?          List names of tables matching LIKE pattern TABLE",
  ".testcase NAME           Begin redirecting output to 'testcase-out.txt'",
  ".timeout MS              Try opening locked tables for MS milliseconds",
  ".timer on|off            Turn SQL timer on or off",
#ifndef SQLITE_OMIT_TRACE
  ".trace ?OPTIONS?         Output each SQL statement as it is run",
  "    FILE                    Send output to FILE",
  "    stdout                  Send output to stdout",
  "    stderr                  Send output to stderr",
  "    off                     Disable tracing",
  "    --expanded              Expand query parameters",
#ifdef SQLITE_ENABLE_NORMALIZE
  "    --normalized            Normal the SQL statements",
#endif
  "    --plain                 Show SQL as it is input",
  "    --stmt                  Trace statement execution (SQLITE_TRACE_STMT)",
  "    --profile               Profile statements (SQLITE_TRACE_PROFILE)",
  "    --row                   Trace each row (SQLITE_TRACE_ROW)",
  "    --close                 Trace connection close (SQLITE_TRACE_CLOSE)",
#endif /* SQLITE_OMIT_TRACE */
  ".vfsinfo ?AUX?           Information about the top-level VFS",
  ".vfslist                 List all available VFSes",
  ".vfsname ?AUX?           Print the name of the VFS stack",
  ".width NUM1 NUM2 ...     Set column widths for \"column\" mode",
  "     Negative values right-justify",
};

3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014

4015





4016




















4017
4018



4019







4020
4021
4022
4023
4024
4025
4026
    if( f==0 ){
      utf8_printf(stderr, "Error: cannot open \"%s\"\n", zFile);
    }
  }
  return f;
}

#if !defined(SQLITE_OMIT_TRACE) && !defined(SQLITE_OMIT_FLOATING_POINT)
/*
** A routine for handling output from sqlite3_trace().
*/
static int sql_trace_callback(
  unsigned mType,
  void *pArg,
  void *pP,
  void *pX
){
  FILE *f = (FILE*)pArg;
  UNUSED_PARAMETER(mType);
  UNUSED_PARAMETER(pP);

  if( f ){





    const char *z = (const char*)pX;




















    int i = strlen30(z);
    while( i>0 && z[i-1]==';' ){ i--; }



    utf8_printf(f, "%.*s;\n", i, z);







  }
  return 0;
}
#endif

/*
** A no-op routine that runs with the ".breakpoint" doc-command.  This is







|




|
|
|
|

|
|
|
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
|
>
>
>
>
>
>
>







4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
    if( f==0 ){
      utf8_printf(stderr, "Error: cannot open \"%s\"\n", zFile);
    }
  }
  return f;
}

#ifndef SQLITE_OMIT_TRACE
/*
** A routine for handling output from sqlite3_trace().
*/
static int sql_trace_callback(
  unsigned mType,         /* The trace type */
  void *pArg,             /* The ShellState pointer */
  void *pP,               /* Usually a pointer to sqlite_stmt */
  void *pX                /* Auxiliary output */
){
  ShellState *p = (ShellState*)pArg;
  sqlite3_stmt *pStmt;
  const char *zSql;
  int nSql;
  if( p->traceOut==0 ) return 0;
  if( mType==SQLITE_TRACE_CLOSE ){
    utf8_printf(p->traceOut, "-- closing database connection\n");
    return 0;
  }
  if( mType!=SQLITE_TRACE_ROW && ((const char*)pX)[0]=='-' ){
    zSql = (const char*)pX;
  }else{
    pStmt = (sqlite3_stmt*)pP;
    switch( p->eTraceType ){
      case SHELL_TRACE_EXPANDED: {
        zSql = sqlite3_expanded_sql(pStmt);
        break;
      }
#ifdef SQLITE_ENABLE_NORMALIZE
      case SHELL_TRACE_NORMALIZED: {
        zSql = sqlite3_normalized_sql(pStmt);
        break;
      }
#endif
      default: {
        zSql = sqlite3_sql(pStmt);
        break;
      }
    }
  }
  if( zSql==0 ) return 0;
  nSql = strlen30(zSql);
  while( nSql>0 && zSql[nSql-1]==';' ){ nSql--; }
  switch( mType ){
    case SQLITE_TRACE_ROW:
    case SQLITE_TRACE_STMT: {
      utf8_printf(p->traceOut, "%.*s;\n", nSql, zSql);
      break;
    }
    case SQLITE_TRACE_PROFILE: {
      sqlite3_int64 nNanosec = *(sqlite3_int64*)pX;
      utf8_printf(p->traceOut, "%.*s; -- %lld ns\n", nSql, zSql, nNanosec);
      break;
    }
  }
  return 0;
}
#endif

/*
** A no-op routine that runs with the ".breakpoint" doc-command.  This is
7834
7835
7836
7837
7838
7839
7840

7841


7842


7843
























7844
7845
7846
7847

7848
7849
7850


7851
7852
7853

7854
7855
7856
7857

7858
7859
7860
7861
7862
7863
7864
      }
    }else{
      raw_printf(stderr, "Usage: .timer on|off\n");
      rc = 1;
    }
  }else


  if( c=='t' && strncmp(azArg[0], "trace", n)==0 ){


    open_db(p, 0);


    if( nArg!=2 ){
























      raw_printf(stderr, "Usage: .trace FILE|off\n");
      rc = 1;
      goto meta_command_exit;
    }

    output_file_close(p->traceOut);
    p->traceOut = output_file_open(azArg[1], 0);
#if !defined(SQLITE_OMIT_TRACE) && !defined(SQLITE_OMIT_FLOATING_POINT)


    if( p->traceOut==0 ){
      sqlite3_trace_v2(p->db, 0, 0, 0);
    }else{

      sqlite3_trace_v2(p->db, SQLITE_TRACE_STMT, sql_trace_callback,p->traceOut);
    }
#endif
  }else


#if SQLITE_USER_AUTHENTICATION
  if( c=='u' && strncmp(azArg[0], "user", n)==0 ){
    if( nArg<2 ){
      raw_printf(stderr, "Usage: .user SUBCOMMAND ...\n");
      rc = 1;
      goto meta_command_exit;







>

>
>

>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
>
|
|
<
>
>



>
|

<

>







7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937

7938
7939
7940
7941
7942
7943
7944
7945

7946
7947
7948
7949
7950
7951
7952
7953
7954
      }
    }else{
      raw_printf(stderr, "Usage: .timer on|off\n");
      rc = 1;
    }
  }else

#ifndef SQLITE_OMIT_TRACE
  if( c=='t' && strncmp(azArg[0], "trace", n)==0 ){
    int mType = 0;
    int jj;
    open_db(p, 0);
    for(jj=1; jj<nArg; jj++){
      const char *z = azArg[jj];
      if( z[0]=='-' ){
        if( optionMatch(z, "expanded") ){
          p->eTraceType = SHELL_TRACE_EXPANDED;
        }
#ifdef SQLITE_ENABLE_NORMALIZE
        else if( optionMatch(z, "normalized") ){
          p->eTraceType = SHELL_TRACE_NORMALIZED;
        }
#endif
        else if( optionMatch(z, "plain") ){
          p->eTraceType = SHELL_TRACE_PLAIN;
        }
        else if( optionMatch(z, "profile") ){
          mType |= SQLITE_TRACE_PROFILE;
        }
        else if( optionMatch(z, "row") ){
          mType |= SQLITE_TRACE_ROW;
        }
        else if( optionMatch(z, "stmt") ){
          mType |= SQLITE_TRACE_STMT;
        }
        else if( optionMatch(z, "close") ){
          mType |= SQLITE_TRACE_CLOSE;
        }
        else {
          raw_printf(stderr, "Unknown option \"%s\" on \".trace\"\n", z);
          rc = 1;
          goto meta_command_exit;
        }
      }else{
        output_file_close(p->traceOut);
        p->traceOut = output_file_open(azArg[1], 0);

      }
    }
    if( p->traceOut==0 ){
      sqlite3_trace_v2(p->db, 0, 0, 0);
    }else{
      if( mType==0 ) mType = SQLITE_TRACE_STMT;
      sqlite3_trace_v2(p->db, mType, sql_trace_callback, p);
    }

  }else
#endif /* !defined(SQLITE_OMIT_TRACE) */

#if SQLITE_USER_AUTHENTICATION
  if( c=='u' && strncmp(azArg[0], "user", n)==0 ){
    if( nArg<2 ){
      raw_printf(stderr, "Usage: .user SUBCOMMAND ...\n");
      rc = 1;
      goto meta_command_exit;
Changes to src/sqliteInt.h.
1352
1353
1354
1355
1356
1357
1358
1359

1360
1361

1362

1363
1364
1365
1366
1367
1368
1369
                               const char*);
#endif

#ifndef SQLITE_OMIT_DEPRECATED
/* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing
** in the style of sqlite3_trace()
*/
#define SQLITE_TRACE_LEGACY  0x80

#else
#define SQLITE_TRACE_LEGACY  0

#endif /* SQLITE_OMIT_DEPRECATED */



/*
** Each database connection is an instance of the following structure.
*/
struct sqlite3 {
  sqlite3_vfs *pVfs;            /* OS Interface */







|
>

|
>

>







1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
                               const char*);
#endif

#ifndef SQLITE_OMIT_DEPRECATED
/* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing
** in the style of sqlite3_trace()
*/
#define SQLITE_TRACE_LEGACY          0x40     /* Use the legacy xTrace */
#define SQLITE_TRACE_XPROFILE        0x80     /* Use the legacy xProfile */
#else
#define SQLITE_TRACE_LEGACY          0
#define SQLITE_TRACE_XPROFILE        0
#endif /* SQLITE_OMIT_DEPRECATED */
#define SQLITE_TRACE_NONLEGACY_MASK  0x0f     /* Normal flags */


/*
** Each database connection is an instance of the following structure.
*/
struct sqlite3 {
  sqlite3_vfs *pVfs;            /* OS Interface */
1415
1416
1417
1418
1419
1420
1421

1422
1423

1424
1425
1426
1427
1428
1429
1430
  int nVdbeWrite;               /* Number of active VDBEs that read and write */
  int nVdbeExec;                /* Number of nested calls to VdbeExec() */
  int nVDestroy;                /* Number of active OP_VDestroy operations */
  int nExtension;               /* Number of loaded extensions */
  void **aExtension;            /* Array of shared library handles */
  int (*xTrace)(u32,void*,void*,void*);     /* Trace function */
  void *pTraceArg;                          /* Argument to the trace function */

  void (*xProfile)(void*,const char*,u64);  /* Profiling function */
  void *pProfileArg;                        /* Argument to profile function */

  void *pCommitArg;                 /* Argument to xCommitCallback() */
  int (*xCommitCallback)(void*);    /* Invoked at every commit. */
  void *pRollbackArg;               /* Argument to xRollbackCallback() */
  void (*xRollbackCallback)(void*); /* Invoked at every commit. */
  void *pUpdateArg;
  void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64);
#ifdef SQLITE_ENABLE_PREUPDATE_HOOK







>


>







1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
  int nVdbeWrite;               /* Number of active VDBEs that read and write */
  int nVdbeExec;                /* Number of nested calls to VdbeExec() */
  int nVDestroy;                /* Number of active OP_VDestroy operations */
  int nExtension;               /* Number of loaded extensions */
  void **aExtension;            /* Array of shared library handles */
  int (*xTrace)(u32,void*,void*,void*);     /* Trace function */
  void *pTraceArg;                          /* Argument to the trace function */
#ifndef SQLITE_OMIT_DEPRECATED
  void (*xProfile)(void*,const char*,u64);  /* Profiling function */
  void *pProfileArg;                        /* Argument to profile function */
#endif
  void *pCommitArg;                 /* Argument to xCommitCallback() */
  int (*xCommitCallback)(void*);    /* Invoked at every commit. */
  void *pRollbackArg;               /* Argument to xRollbackCallback() */
  void (*xRollbackCallback)(void*); /* Invoked at every commit. */
  void *pUpdateArg;
  void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64);
#ifdef SQLITE_ENABLE_PREUPDATE_HOOK
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
int sqlite3VtabBegin(sqlite3 *, VTable *);
FuncDef *sqlite3VtabOverloadFunction(sqlite3 *,FuncDef*, int nArg, Expr*);
sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context*);
int sqlite3VdbeParameterIndex(Vdbe*, const char*, int);
int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
void sqlite3ParserReset(Parse*);
#ifdef SQLITE_ENABLE_NORMALIZE
void sqlite3Normalize(Vdbe*, const char*, int, u8);
#endif
int sqlite3Reprepare(Vdbe*);
void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
int sqlite3TempInMemory(const sqlite3*);
const char *sqlite3JournalModename(int);
#ifndef SQLITE_OMIT_WAL







|







4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
int sqlite3VtabBegin(sqlite3 *, VTable *);
FuncDef *sqlite3VtabOverloadFunction(sqlite3 *,FuncDef*, int nArg, Expr*);
sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context*);
int sqlite3VdbeParameterIndex(Vdbe*, const char*, int);
int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
void sqlite3ParserReset(Parse*);
#ifdef SQLITE_ENABLE_NORMALIZE
char *sqlite3Normalize(Vdbe*, const char*, int);
#endif
int sqlite3Reprepare(Vdbe*);
void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
int sqlite3TempInMemory(const sqlite3*);
const char *sqlite3JournalModename(int);
#ifndef SQLITE_OMIT_WAL
Changes to src/vdbeapi.c.
58
59
60
61
62
63
64
65
66
67
68
69

70
71
72

73
74
75
76
77
78
79
** Invoke the profile callback.  This routine is only called if we already
** know that the profile callback is defined and needs to be invoked.
*/
static SQLITE_NOINLINE void invokeProfileCallback(sqlite3 *db, Vdbe *p){
  sqlite3_int64 iNow;
  sqlite3_int64 iElapse;
  assert( p->startTime>0 );
  assert( db->xProfile!=0 || (db->mTrace & SQLITE_TRACE_PROFILE)!=0 );
  assert( db->init.busy==0 );
  assert( p->zSql!=0 );
  sqlite3OsCurrentTimeInt64(db->pVfs, &iNow);
  iElapse = (iNow - p->startTime)*1000000;

  if( db->xProfile ){
    db->xProfile(db->pProfileArg, p->zSql, iElapse);
  }

  if( db->mTrace & SQLITE_TRACE_PROFILE ){
    db->xTrace(SQLITE_TRACE_PROFILE, db->pTraceArg, p, (void*)&iElapse);
  }
  p->startTime = 0;
}
/*
** The checkProfileCallback(DB,P) macro checks to see if a profile callback







|




>



>







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
** Invoke the profile callback.  This routine is only called if we already
** know that the profile callback is defined and needs to be invoked.
*/
static SQLITE_NOINLINE void invokeProfileCallback(sqlite3 *db, Vdbe *p){
  sqlite3_int64 iNow;
  sqlite3_int64 iElapse;
  assert( p->startTime>0 );
  assert( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0 );
  assert( db->init.busy==0 );
  assert( p->zSql!=0 );
  sqlite3OsCurrentTimeInt64(db->pVfs, &iNow);
  iElapse = (iNow - p->startTime)*1000000;
#ifndef SQLITE_OMIT_DEPRECATED  	
  if( db->xProfile ){
    db->xProfile(db->pProfileArg, p->zSql, iElapse);
  }
#endif
  if( db->mTrace & SQLITE_TRACE_PROFILE ){
    db->xTrace(SQLITE_TRACE_PROFILE, db->pTraceArg, p, (void*)&iElapse);
  }
  p->startTime = 0;
}
/*
** The checkProfileCallback(DB,P) macro checks to see if a profile callback
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
    }

    assert( db->nVdbeWrite>0 || db->autoCommit==0 
        || (db->nDeferredCons==0 && db->nDeferredImmCons==0)
    );

#ifndef SQLITE_OMIT_TRACE
    if( (db->xProfile || (db->mTrace & SQLITE_TRACE_PROFILE)!=0)
        && !db->init.busy && p->zSql ){
      sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime);
    }else{
      assert( p->startTime==0 );
    }
#endif








|







600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
    }

    assert( db->nVdbeWrite>0 || db->autoCommit==0 
        || (db->nDeferredCons==0 && db->nDeferredImmCons==0)
    );

#ifndef SQLITE_OMIT_TRACE
    if( (db->mTrace & (SQLITE_TRACE_PROFILE|SQLITE_TRACE_XPROFILE))!=0
        && !db->init.busy && p->zSql ){
      sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime);
    }else{
      assert( p->startTime==0 );
    }
#endif

625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640
641

642
643
644
645
646
647
648
#endif /* SQLITE_OMIT_EXPLAIN */
  {
    db->nVdbeExec++;
    rc = sqlite3VdbeExec(p);
    db->nVdbeExec--;
  }


#ifndef SQLITE_OMIT_TRACE
  /* If the statement completed successfully, invoke the profile callback */
  if( rc!=SQLITE_ROW ) checkProfileCallback(db, p);
#endif

  if( rc==SQLITE_DONE && db->autoCommit ){
    assert( p->rc==SQLITE_OK );
    p->rc = doWalCallbacks(db);
    if( p->rc!=SQLITE_OK ){
      rc = SQLITE_ERROR;

    }
  }

  db->errCode = rc;
  if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){
    p->rc = SQLITE_NOMEM_BKPT;
  }







>

|
|


|
|
|
|
|
>







627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
#endif /* SQLITE_OMIT_EXPLAIN */
  {
    db->nVdbeExec++;
    rc = sqlite3VdbeExec(p);
    db->nVdbeExec--;
  }

  if( rc!=SQLITE_ROW ){
#ifndef SQLITE_OMIT_TRACE
    /* If the statement completed successfully, invoke the profile callback */
    checkProfileCallback(db, p);
#endif

    if( rc==SQLITE_DONE && db->autoCommit ){
      assert( p->rc==SQLITE_OK );
      p->rc = doWalCallbacks(db);
      if( p->rc!=SQLITE_OK ){
        rc = SQLITE_ERROR;
      }
    }
  }

  db->errCode = rc;
  if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){
    p->rc = SQLITE_NOMEM_BKPT;
  }
1704
1705
1706
1707
1708
1709
1710




1711
1712
1713
1714
1715
1716
1717
1718

#ifdef SQLITE_ENABLE_NORMALIZE
/*
** Return the normalized SQL associated with a prepared statement.
*/
const char *sqlite3_normalized_sql(sqlite3_stmt *pStmt){
  Vdbe *p = (Vdbe *)pStmt;




  return p ? p->zNormSql : 0;
}
#endif /* SQLITE_ENABLE_NORMALIZE */

#ifdef SQLITE_ENABLE_PREUPDATE_HOOK
/*
** Allocate and populate an UnpackedRecord structure based on the serialized
** record in nKey/pKey. Return a pointer to the new UnpackedRecord structure







>
>
>
>
|







1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726

#ifdef SQLITE_ENABLE_NORMALIZE
/*
** Return the normalized SQL associated with a prepared statement.
*/
const char *sqlite3_normalized_sql(sqlite3_stmt *pStmt){
  Vdbe *p = (Vdbe *)pStmt;
  if( p==0 ) return 0;
  if( p->zNormSql==0 && p->zSql!=0 ){
    p->zNormSql = sqlite3Normalize(p, p->zSql, sqlite3Strlen30(p->zSql));
  }
  return p->zNormSql;
}
#endif /* SQLITE_ENABLE_NORMALIZE */

#ifdef SQLITE_ENABLE_PREUPDATE_HOOK
/*
** Allocate and populate an UnpackedRecord structure based on the serialized
** record in nKey/pKey. Return a pointer to the new UnpackedRecord structure
Changes to src/vdbeaux.c.
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    p->expmask = 0;
  }
  assert( p->zSql==0 );
  p->zSql = sqlite3DbStrNDup(p->db, z, n);
#ifdef SQLITE_ENABLE_NORMALIZE
  assert( p->zNormSql==0 );
  if( p->zSql && (prepFlags & SQLITE_PREPARE_NORMALIZE)!=0 ){
    sqlite3Normalize(p, p->zSql, n, prepFlags);
    assert( p->zNormSql!=0 || p->db->mallocFailed );
  }
#endif
}

/*
** Swap all content between two VDBE structures.







|







63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    p->expmask = 0;
  }
  assert( p->zSql==0 );
  p->zSql = sqlite3DbStrNDup(p->db, z, n);
#ifdef SQLITE_ENABLE_NORMALIZE
  assert( p->zNormSql==0 );
  if( p->zSql && (prepFlags & SQLITE_PREPARE_NORMALIZE)!=0 ){
    p->zNormSql = sqlite3Normalize(p, p->zSql, n);
    assert( p->zNormSql!=0 || p->db->mallocFailed );
  }
#endif
}

/*
** Swap all content between two VDBE structures.
Added test/fts4umlaut.test.


































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# 2018 December 3
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts4umlaut

ifcapable !fts3 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  CREATE VIRTUAL TABLE t2 USING fts4(
      x, 
      tokenize=unicode61 "remove_diacritics=2"
  );
}

foreach {tn q res1 res2} {
  1 "Hà Nội"                  0 1
  2 "Hà Noi"                  1 1
  3 "Ha Noi"                  1 1
  4 "Ha N\u1ed9i"             0 1
  5 "Ha N\u006fi"             1 1
  6 "Ha N\u006f\u0302i"       1 1
  7 "Ha N\u006f\u0323\u0302i" 1 1
} {
  do_execsql_test 1.$tn.1 {
    DELETE FROM t1;
    INSERT INTO t1(rowid, x) VALUES (1, 'Ha Noi');
    SELECT count(*) FROM t1 WHERE t1 MATCH $q
  } $res1
  do_execsql_test 1.$tn.2 {
    DELETE FROM t1;
    INSERT INTO t1(rowid, x) VALUES (1, $q);
    SELECT count(*) FROM t1 WHERE t1 MATCH 'Ha Noi'
  } $res1

  do_execsql_test 1.$tn.2 {
    DELETE FROM t2;
    INSERT INTO t2(rowid, x) VALUES (1, 'Ha Noi');
    SELECT count(*) FROM t2 WHERE t2 MATCH $q
  } $res2
  do_execsql_test 1.$tn.2 {
    DELETE FROM t2;
    INSERT INTO t2(rowid, x) VALUES (1, $q);
    SELECT count(*) FROM t2 WHERE t2 MATCH 'Ha Noi'
  } $res2
}

finish_test

Changes to test/json101.test.
809
810
811
812
813
814
815



816














817
818
do_execsql_test json-14.160 {
  SELECT fullkey FROM json_tree('"hello"');
} {$}
do_execsql_test json-14.170 {
  SELECT fullkey FROM json_tree('null');
} {$}




















finish_test







>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>


809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
do_execsql_test json-14.160 {
  SELECT fullkey FROM json_tree('"hello"');
} {$}
do_execsql_test json-14.170 {
  SELECT fullkey FROM json_tree('null');
} {$}

# 2018-12-03
# Make sure the table-valued functions contained within parentheses
# work correctly.
#
# Bug reported via private email. See TH3 for more information.
#
do_execsql_test json-15.100 {
  SELECT * FROM JSON_EACH('{"a":1, "b":2}');
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.110 {
  SELECT xyz.* FROM JSON_EACH('{"a":1, "b":2}') AS xyz;
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.120 {
  SELECT * FROM (JSON_EACH('{"a":1, "b":2}'));
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}
do_execsql_test json-15.130 {
  SELECT xyz.* FROM (JSON_EACH('{"a":1, "b":2}')) AS xyz;
} {a 1 integer 1 2 {} {$.a} {$} b 2 integer 2 4 {} {$.b} {$}}

finish_test
Changes to test/shell4.test.
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
}]
  list [regexp {Memory Used} $res] \
       [regexp {Heap Usage} $res] \
       [regexp {Autoindex Inserts} $res]
} {1 1 1}

do_test shell4-2.1 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace"
} {1 {Usage: .trace FILE|off}}
do_test shell4-2.2 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace off\n.trace off\n"
} {0 {}}
do_test shell4-2.3 {
  catchcmd ":memory:" ".trace stdout\n.trace\n.trace off\n.dump\n"
} {/^1 {PRAGMA.*Usage:.*}$/}
ifcapable trace {
do_test shell4-2.4 {
  catchcmd ":memory:" ".trace stdout\nCREATE TABLE t1(x);SELECT * FROM t1;"
} {0 {CREATE TABLE t1(x);
SELECT * FROM t1;}}
do_test shell4-2.5 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace stdout\nSELECT * FROM t1;"







|
|




|
|







103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
}]
  list [regexp {Memory Used} $res] \
       [regexp {Heap Usage} $res] \
       [regexp {Autoindex Inserts} $res]
} {1 1 1}

do_test shell4-2.1 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace --unknown"
} {1 {Unknown option "--unknown" on ".trace"}}
do_test shell4-2.2 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace off\n.trace off\n"
} {0 {}}
do_test shell4-2.3 {
  catchcmd ":memory:" ".trace stdout\n.dump\n.trace off\n"
} {/^0 {PRAGMA.*}$/}
ifcapable trace {
do_test shell4-2.4 {
  catchcmd ":memory:" ".trace stdout\nCREATE TABLE t1(x);SELECT * FROM t1;"
} {0 {CREATE TABLE t1(x);
SELECT * FROM t1;}}
do_test shell4-2.5 {
  catchcmd ":memory:" "CREATE TABLE t1(x);\n.trace stdout\nSELECT * FROM t1;"
Changes to test/tabfunc01.test.
120
121
122
123
124
125
126





127
128
129
130
131
132
133
  SELECT * FROM temp.generate_series(1,4)
} {1 2 3 4}
do_execsql_test tabfunc01-4.3 {
  ATTACH ':memory:' AS aux1;
  CREATE TABLE aux1.t1(a,b,c);
  SELECT * FROM aux1.generate_series(1,4)
} {1 2 3 4}






# The next series of tests is verifying that virtual table are able
# to optimize the IN operator, even on terms that are not marked "omit".
# When the generate_series virtual table is compiled for the testfixture,
# the special -DSQLITE_SERIES_CONSTRAINT_VERIFY=1 option is used, which
# causes the xBestIndex method of generate_series to leave the
# sqlite3_index_constraint_usage.omit flag set to 0, which should cause







>
>
>
>
>







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  SELECT * FROM temp.generate_series(1,4)
} {1 2 3 4}
do_execsql_test tabfunc01-4.3 {
  ATTACH ':memory:' AS aux1;
  CREATE TABLE aux1.t1(a,b,c);
  SELECT * FROM aux1.generate_series(1,4)
} {1 2 3 4}

# 2018-12-03: Fix bug reported by by private email.
do_execsql_test tabfunc01-4.4 {
  SELECT * FROM (generate_series(1,5,2)) AS x LIMIT 10;
} {1 3 5}

# The next series of tests is verifying that virtual table are able
# to optimize the IN operator, even on terms that are not marked "omit".
# When the generate_series virtual table is compiled for the testfixture,
# the special -DSQLITE_SERIES_CONSTRAINT_VERIFY=1 option is used, which
# causes the xBestIndex method of generate_series to leave the
# sqlite3_index_constraint_usage.omit flag set to 0, which should cause
Added tool/index_usage.c.








































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*
** 2018-12-04
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** 
** This file implements a utility program used to help determine which
** indexes in a database schema are used and unused, and how often specific
** indexes are used.
*/
#include "sqlite3.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>

static void usage(const char *argv0){
  printf("Usage: %s DATABASE LOG\n\n", argv0);
  printf(
    "DATABASE is an SQLite database against which various statements\n"
    "have been run.  The SQL text is stored in LOG.  LOG is an SQLite\n"
    "database with this schema:\n"
    "\n"
    "    CREATE TABLE sqllog(sql TEXT);\n"
    "\n"
    "This utility program analyzes statements contained in LOG and prints\n"
    "a report showing how many times each index in DATABASE is used by the\n"
    "statements in LOG.\n"
    "\n"
    "DATABASE only needs to contain the schema used by the statements in\n"
    "LOG. The content can be removed from DATABASE.\n"
  );
  printf("\nAnalysis will be done by SQLite version %s dated %.20s\n"
         "checkin number %.40s. Different versions\n"
         "of SQLite might use different indexes.\n",
         sqlite3_libversion(), sqlite3_sourceid(), sqlite3_sourceid()+21);
  exit(1);
}

int main(int argc, char **argv){
  sqlite3 *db = 0;          /* The main database */
  sqlite3_stmt *pStmt = 0;  /* a query */
  char *zSql;
  int nErr = 0;
  int rc;

  if( argc!=3 ) usage(argv[0]);
  rc = sqlite3_open_v2(argv[1], &db, SQLITE_OPEN_READONLY, 0);
  if( rc ){
    printf("Cannot open \"%s\" for reading: %s\n", argv[1], sqlite3_errmsg(db));
    goto errorOut;
  }
  rc = sqlite3_prepare_v2(db, "SELECT * FROM sqlite_master", -1, &pStmt, 0);
  if( rc ){
    printf("Cannot read the schema from \"%s\" - %s\n", argv[1],
           sqlite3_errmsg(db));
    goto errorOut;
  }
  sqlite3_finalize(pStmt);
  pStmt = 0;
  rc = sqlite3_exec(db, 
     "CREATE TABLE temp.idxu(\n"
     "  tbl TEXT,\n"
     "  idx TEXT,\n"
     "  cnt INT,\n"
     "  PRIMARY KEY(idx)\n"
     ") WITHOUT ROWID;", 0, 0, 0);
  if( rc ){
    printf("Cannot create the result table - %s\n",
           sqlite3_errmsg(db));
    goto errorOut;
  }
  rc = sqlite3_exec(db,
     "INSERT INTO temp.idxu(tbl,idx,cnt)"
     " SELECT tbl_name, name, 0 FROM sqlite_master"
     " WHERE type='index' AND sql IS NOT NULL", 0, 0, 0);

  /* Open the LOG database */
  zSql = sqlite3_mprintf("ATTACH %Q AS log", argv[2]);
  rc = sqlite3_exec(db, zSql, 0, 0, 0);
  sqlite3_free(zSql);
  if( rc ){
    printf("Cannot open the LOG database \"%s\" - %s\n",
           argv[2], sqlite3_errmsg(db));
    goto errorOut;
  }
  rc = sqlite3_prepare_v2(db, "SELECT sql, rowid FROM log.sqllog",
                          -1, &pStmt, 0);
  if( rc ){
    printf("Cannot read the SQLLOG table in the LOG database \"%s\" - %s\n",
           argv[2], sqlite3_errmsg(db));
    goto errorOut;
  }

  /* Update the counts based on LOG */
  while( sqlite3_step(pStmt)==SQLITE_ROW ){
    const char *zLog = (const char*)sqlite3_column_text(pStmt, 0);
    sqlite3_stmt *pS2;
    if( zLog==0 ) continue;
    zSql = sqlite3_mprintf("EXPLAIN QUERY PLAN %s", zLog);
    rc = sqlite3_prepare_v2(db, zSql, -1, &pS2, 0);
    sqlite3_free(zSql);
    if( rc ){
      printf("Cannot compile LOG entry %d (%s): %s\n",
             sqlite3_column_int(pStmt, 1), zLog, sqlite3_errmsg(db));
      nErr++;
    }else{
      while( sqlite3_step(pS2)==SQLITE_ROW ){
        const char *zExplain = (const char*)sqlite3_column_text(pS2,3);
        const char *z1, *z2;
        int n;
        /* printf("EXPLAIN: %s\n", zExplain); */
        z1 = strstr(zExplain, " USING INDEX ");
        if( z1==0 ) continue;
        z1 += 13;
        for(z2=z1+1; z2[1] && z2[1]!='('; z2++){}
        n = z2 - z1;
        zSql = sqlite3_mprintf(
          "UPDATE temp.idxu SET cnt=cnt+1 WHERE idx='%.*q'", n, z1
        );
        /* printf("sql: %s\n", zSql); */
        sqlite3_exec(db, zSql, 0, 0, 0);
        sqlite3_free(zSql);
      }
    }
    sqlite3_finalize(pS2);
  }
  sqlite3_finalize(pStmt);

  /* Generate the report */
  rc = sqlite3_prepare_v2(db,
     "SELECT tbl, idx, cnt, "
     "   (SELECT group_concat(name,',') FROM pragma_index_info(idx))"
     " FROM temp.idxu, main.sqlite_master"
     " WHERE temp.idxu.tbl=main.sqlite_master.tbl_name"
     "   AND temp.idxu.idx=main.sqlite_master.name"
     " ORDER BY cnt DESC, tbl, idx",
     -1, &pStmt, 0);
  if( rc ){
    printf("Cannot query the result table - %s\n",
           sqlite3_errmsg(db));
    goto errorOut;
  }
  while( sqlite3_step(pStmt)==SQLITE_ROW ){
    printf("%10d %s on %s(%s)\n", 
       sqlite3_column_int(pStmt, 2),
       sqlite3_column_text(pStmt, 1),
       sqlite3_column_text(pStmt, 0),
       sqlite3_column_text(pStmt, 3));
  }
  sqlite3_finalize(pStmt);
  pStmt = 0;

errorOut:
  sqlite3_finalize(pStmt);
  sqlite3_close(db);
  return nErr;
}
Changes to tool/lemon.c.
4586
4587
4588
4589
4590
4591
4592
4593

4594
4595
4596
4597
4598


4599




4600
4601
4602
4603
4604
4605
4606
  }
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate code which executes whenever the parser stack overflows */
  tplt_print(out,lemp,lemp->overflow,&lineno);
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate the table of rule information

  **
  ** Note: This code depends on the fact that rules are number
  ** sequentually beginning with 0.
  */
  for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){


    fprintf(out,"  { %4d, %4d }, /* (%d) ",rp->lhs->index,-rp->nrhs,i);




    rule_print(out, rp);
    fprintf(out," */\n"); lineno++;
  }
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate code which execution during each REDUCE action */
  i = 0;







|
>





>
>
|
>
>
>
>







4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
  }
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate code which executes whenever the parser stack overflows */
  tplt_print(out,lemp,lemp->overflow,&lineno);
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate the tables of rule information.  yyRuleInfoLhs[] and
  ** yyRuleInfoNRhs[].
  **
  ** Note: This code depends on the fact that rules are number
  ** sequentually beginning with 0.
  */
  for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
    fprintf(out,"  %4d,  /* (%d) ", rp->lhs->index, i);
     rule_print(out, rp);
    fprintf(out," */\n"); lineno++;
  }
  tplt_xfer(lemp->name,in,out,&lineno);
  for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
    fprintf(out,"  %3d,  /* (%d) ", -rp->nrhs, i);
    rule_print(out, rp);
    fprintf(out," */\n"); lineno++;
  }
  tplt_xfer(lemp->name,in,out,&lineno);

  /* Generate code which execution during each REDUCE action */
  i = 0;
Changes to tool/lempar.c.
682
683
684
685
686
687
688
689
690
691
692


693
694

695
696
697
698
699
700
701
702
  yytos = yypParser->yytos;
  yytos->stateno = yyNewState;
  yytos->major = yyMajor;
  yytos->minor.yy0 = yyMinor;
  yyTraceShift(yypParser, yyNewState, "Shift");
}

/* The following table contains information about every rule that
** is used during the reduce.
*/
static const struct {


  YYCODETYPE lhs;       /* Symbol on the left-hand side of the rule */
  signed char nrhs;     /* Negative of the number of RHS symbols in the rule */

} yyRuleInfo[] = {
%%
};

static void yy_accept(yyParser*);  /* Forward Declaration */

/*
** Perform a reduce action and the shift that must immediately







|
|
<
|
>
>
|
|
>
|







682
683
684
685
686
687
688
689
690

691
692
693
694
695
696
697
698
699
700
701
702
703
704
  yytos = yypParser->yytos;
  yytos->stateno = yyNewState;
  yytos->major = yyMajor;
  yytos->minor.yy0 = yyMinor;
  yyTraceShift(yypParser, yyNewState, "Shift");
}

/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
** of that rule */

static const YYCODETYPE yyRuleInfoLhs[] = {
%%
};

/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
** of symbols on the right-hand side of that rule. */
static const signed char yyRuleInfoNRhs[] = {
%%
};

static void yy_accept(yyParser*);  /* Forward Declaration */

/*
** Perform a reduce action and the shift that must immediately
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
  int yysize;                     /* Amount to pop the stack */
  ParseARG_FETCH
  (void)yyLookahead;
  (void)yyLookaheadToken;
  yymsp = yypParser->yytos;
#ifndef NDEBUG
  if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
    yysize = yyRuleInfo[yyruleno].nrhs;
    if( yysize ){
      fprintf(yyTraceFILE, "%sReduce %d [%s], go to state %d.\n",
        yyTracePrompt,
        yyruleno, yyRuleName[yyruleno], yymsp[yysize].stateno);
    }else{
      fprintf(yyTraceFILE, "%sReduce %d [%s].\n",
        yyTracePrompt, yyruleno, yyRuleName[yyruleno]);
    }
  }
#endif /* NDEBUG */

  /* Check that the stack is large enough to grow by a single entry
  ** if the RHS of the rule is empty.  This ensures that there is room
  ** enough on the stack to push the LHS value */
  if( yyRuleInfo[yyruleno].nrhs==0 ){
#ifdef YYTRACKMAXSTACKDEPTH
    if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
      yypParser->yyhwm++;
      assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
    }
#endif
#if YYSTACKDEPTH>0 







|














|







723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
  int yysize;                     /* Amount to pop the stack */
  ParseARG_FETCH
  (void)yyLookahead;
  (void)yyLookaheadToken;
  yymsp = yypParser->yytos;
#ifndef NDEBUG
  if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
    yysize = yyRuleInfoNRhs[yyruleno];
    if( yysize ){
      fprintf(yyTraceFILE, "%sReduce %d [%s], go to state %d.\n",
        yyTracePrompt,
        yyruleno, yyRuleName[yyruleno], yymsp[yysize].stateno);
    }else{
      fprintf(yyTraceFILE, "%sReduce %d [%s].\n",
        yyTracePrompt, yyruleno, yyRuleName[yyruleno]);
    }
  }
#endif /* NDEBUG */

  /* Check that the stack is large enough to grow by a single entry
  ** if the RHS of the rule is empty.  This ensures that there is room
  ** enough on the stack to push the LHS value */
  if( yyRuleInfoNRhs[yyruleno]==0 ){
#ifdef YYTRACKMAXSTACKDEPTH
    if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
      yypParser->yyhwm++;
      assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
    }
#endif
#if YYSTACKDEPTH>0 
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
  **  #line <lineno> <thisfile>
  **     break;
  */
/********** Begin reduce actions **********************************************/
%%
/********** End reduce actions ************************************************/
  };
  assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
  yygoto = yyRuleInfo[yyruleno].lhs;
  yysize = yyRuleInfo[yyruleno].nrhs;
  yyact = yy_find_reduce_action(yymsp[yysize].stateno,(YYCODETYPE)yygoto);

  /* There are no SHIFTREDUCE actions on nonterminals because the table
  ** generator has simplified them to pure REDUCE actions. */
  assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );

  /* It is not possible for a REDUCE to be followed by an error */







|
|
|







780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
  **  #line <lineno> <thisfile>
  **     break;
  */
/********** Begin reduce actions **********************************************/
%%
/********** End reduce actions ************************************************/
  };
  assert( yyruleno<sizeof(yyRuleInfoLhs)/sizeof(yyRuleInfoLhs[0]) );
  yygoto = yyRuleInfoLhs[yyruleno];
  yysize = yyRuleInfoNRhs[yyruleno];
  yyact = yy_find_reduce_action(yymsp[yysize].stateno,(YYCODETYPE)yygoto);

  /* There are no SHIFTREDUCE actions on nonterminals because the table
  ** generator has simplified them to pure REDUCE actions. */
  assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );

  /* It is not possible for a REDUCE to be followed by an error */