/ Check-in [e5540ce0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizations to the tokenizer. (CVS 2011)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:e5540ce047e0215904005bc9df4ff0d1d0a3c1d1
User & Date: drh 2004-10-07 19:03:01
Context
2004-10-07
22:22
Sort the output of glob in test script attach2.test. Ticket #948. (CVS 2012) check-in: 3d04eef9 user: drh tags: trunk
19:03
Optimizations to the tokenizer. (CVS 2011) check-in: e5540ce0 user: drh tags: trunk
03:06
Additional parser optimizations. (CVS 2010) check-in: 618dee12 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/tokenize.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
35
36
37
38

39



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176





177
178
179
180
181
182
183
184
185
186
187
188
189
190
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.90 2004/10/05 02:41:43 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.

*/
typedef struct Keyword Keyword;
struct Keyword {
  char *zName;             /* The keyword name */
  u8 tokenType;            /* Token value for this keyword */
  u8 len;                  /* Length of this keyword */
  u8 iNext;                /* Index in aKeywordTable[] of next with same hash */
};

/*
** These are the keywords

*/



static Keyword aKeywordTable[] = {
  { "ABORT",             TK_ABORT,        },
  { "AFTER",             TK_AFTER,        },
  { "ALL",               TK_ALL,          },
  { "AND",               TK_AND,          },
  { "AS",                TK_AS,           },
  { "ASC",               TK_ASC,          },
  { "ATTACH",            TK_ATTACH,       },
  { "BEFORE",            TK_BEFORE,       },
  { "BEGIN",             TK_BEGIN,        },
  { "BETWEEN",           TK_BETWEEN,      },
  { "BY",                TK_BY,           },
  { "CASCADE",           TK_CASCADE,      },
  { "CASE",              TK_CASE,         },
  { "CHECK",             TK_CHECK,        },
  { "COLLATE",           TK_COLLATE,      },
  { "COMMIT",            TK_COMMIT,       },
  { "CONFLICT",          TK_CONFLICT,     },
  { "CONSTRAINT",        TK_CONSTRAINT,   },
  { "CREATE",            TK_CREATE,       },
  { "CROSS",             TK_JOIN_KW,      },
  { "DATABASE",          TK_DATABASE,     },
  { "DEFAULT",           TK_DEFAULT,      },
  { "DEFERRED",          TK_DEFERRED,     },
  { "DEFERRABLE",        TK_DEFERRABLE,   },
  { "DELETE",            TK_DELETE,       },
  { "DESC",              TK_DESC,         },
  { "DETACH",            TK_DETACH,       },
  { "DISTINCT",          TK_DISTINCT,     },
  { "DROP",              TK_DROP,         },
  { "END",               TK_END,          },
  { "EACH",              TK_EACH,         },
  { "ELSE",              TK_ELSE,         },
  { "EXCEPT",            TK_EXCEPT,       },
  { "EXCLUSIVE",         TK_EXCLUSIVE,    },
  { "EXPLAIN",           TK_EXPLAIN,      },
  { "FAIL",              TK_FAIL,         },
  { "FOR",               TK_FOR,          },
  { "FOREIGN",           TK_FOREIGN,      },
  { "FROM",              TK_FROM,         },
  { "FULL",              TK_JOIN_KW,      },
  { "GLOB",              TK_GLOB,         },
  { "GROUP",             TK_GROUP,        },
  { "HAVING",            TK_HAVING,       },
  { "IGNORE",            TK_IGNORE,       },
  { "IMMEDIATE",         TK_IMMEDIATE,    },
  { "IN",                TK_IN,           },
  { "INDEX",             TK_INDEX,        },
  { "INITIALLY",         TK_INITIALLY,    },
  { "INNER",             TK_JOIN_KW,      },
  { "INSERT",            TK_INSERT,       },
  { "INSTEAD",           TK_INSTEAD,      },
  { "INTERSECT",         TK_INTERSECT,    },
  { "INTO",              TK_INTO,         },
  { "IS",                TK_IS,           },
  { "ISNULL",            TK_ISNULL,       },
  { "JOIN",              TK_JOIN,         },
  { "KEY",               TK_KEY,          },
  { "LEFT",              TK_JOIN_KW,      },
  { "LIKE",              TK_LIKE,         },
  { "LIMIT",             TK_LIMIT,        },
  { "MATCH",             TK_MATCH,        },
  { "NATURAL",           TK_JOIN_KW,      },
  { "NOT",               TK_NOT,          },
  { "NOTNULL",           TK_NOTNULL,      },
  { "NULL",              TK_NULL,         },
  { "OF",                TK_OF,           },
  { "OFFSET",            TK_OFFSET,       },

  { "ON",                TK_ON,           },
  { "OR",                TK_OR,           },
  { "ORDER",             TK_ORDER,        },
  { "OUTER",             TK_JOIN_KW,      },
  { "PRAGMA",            TK_PRAGMA,       },
  { "PRIMARY",           TK_PRIMARY,      },
  { "RAISE",             TK_RAISE,        },
  { "REFERENCES",        TK_REFERENCES,   },
  { "REPLACE",           TK_REPLACE,      },
  { "RESTRICT",          TK_RESTRICT,     },
  { "RIGHT",             TK_JOIN_KW,      },
  { "ROLLBACK",          TK_ROLLBACK,     },
  { "ROW",               TK_ROW,          },
  { "SELECT",            TK_SELECT,       },
  { "SET",               TK_SET,          },
  { "STATEMENT",         TK_STATEMENT,    },
  { "TABLE",             TK_TABLE,        },
  { "TEMP",              TK_TEMP,         },
  { "TEMPORARY",         TK_TEMP,         },
  { "THEN",              TK_THEN,         },
  { "TRANSACTION",       TK_TRANSACTION,  },
  { "TRIGGER",           TK_TRIGGER,      },
  { "UNION",             TK_UNION,        },
  { "UNIQUE",            TK_UNIQUE,       },
  { "UPDATE",            TK_UPDATE,       },
  { "USING",             TK_USING,        },
  { "VACUUM",            TK_VACUUM,       },
  { "VALUES",            TK_VALUES,       },
  { "VIEW",              TK_VIEW,         },

  { "WHEN",              TK_WHEN,         },
  { "WHERE",             TK_WHERE,        },
};

/*
** This is the hash table
*/
#define KEY_HASH_SIZE 101
static u8 aiHashTable[KEY_HASH_SIZE];


/*
** This function looks up an identifier to determine if it is a
** keyword.  If it is a keyword, the token code of that keyword is 
** returned.  If the input is not a keyword, TK_ID is returned.
*/
int sqlite3KeywordCode(const char *z, int n){
  int h, i;
  Keyword *p;
  static char needInit = 1;
  if( needInit ){
    /* Initialize the keyword hash table */
    sqlite3OsEnterMutex();
    if( needInit ){
      int nk;
      nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
      for(i=0, p=aKeywordTable; i<nk; i++, p++){
        const char *zName = p->zName;
        int len = p->len = strlen(zName);
        h = sqlite3HashNoCase(zName, len) % KEY_HASH_SIZE;
        p->iNext = aiHashTable[h];
        aiHashTable[h] = i+1;
      }
      needInit = 0;
    }
    sqlite3OsLeaveMutex();
  }
  h = sqlite3HashNoCase(z, n) % KEY_HASH_SIZE;
  for(i=aiHashTable[h]; i; i=p->iNext){
    p = &aKeywordTable[i-1];





    if( p->len==n && sqlite3StrNICmp(p->zName, z, n)==0 ){
      return p->tokenType;
    }
  }
  return TK_ID;
}


/*
** If X is a character that can be used in an identifier and
** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then
** X is always an identifier character.  (Hence all UTF-8
** characters can be part of an identifier).  isIdChar[X] will
** be 0 for every character in the lower 128 ASCII characters







|







|
|
>
|
<
|
|
|
|
|
<
<
<
<
>

>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
<
<
<
<
>
|
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
>
>
>
>
|
|




<







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

30
31
32
33
34




35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

106
107
108













109





110








111
112

113














114






















115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
** This function looks up an identifier to determine if it is a
** keyword.  If it is a keyword, the token code of that keyword is 
** returned.  If the input is not a keyword, TK_ID is returned.
**

** The implementation of this routine was generated by a program,
** mkkeywordhash.c, located in the tool subdirectory of the distribution.
** The output of the mkkeywordhash.c program was manually cut and pasted
** into this file.  When the set of keywords for SQLite changes, you
** must modify the mkkeywordhash.c program (to add or remove keywords from




** the data tables) then rerun that program to regenerate this function.
*/
int sqlite3KeywordCode(const char *z, int n){
  static const char zText[519] =
    "ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK"
    "COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE"
    "DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE"
    "EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX"
    "INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE"
    "LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE"
    "REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE"
    "TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES"
    "VIEWWHENWHERE";
  static const unsigned char aHash[154] = {
       0,  75,  82,   0,   0,  97,  80,   0,  83,   0,   0,   0,   0,
       0,   0,   6,   0,  95,   4,   0,   0,   0,   0,   0,   0,   0,
       0,  96,  86,   8,   0,  26,  13,   7,  19,  15,   0,   0,  32,
      25,   0,  21,  31,  41,   0,   0,   0,  34,  27,   0,   0,  30,
       0,   0,   0,   9,   0,  10,   0,   0,   0,   0,  51,   0,  44,
      43,   0,  45,  40,   0,  29,  39,  35,   0,   0,  20,   0,  59,
       0,  16,   0,  17,   0,  18,   0,  55,  42,  72,   0,  33,   0,
       0,  61,  66,  56,   0,   0,   0,   0,   0,   0,   0,  54,   0,
       0,   0,   0,   0,  74,  50,  76,  64,  52,   0,   0,   0,   0,
      68,  84,   0,  47,   0,  58,  60,  92,   0,   0,  48,   0,  93,
       0,  63,  71,  98,   0,   0,   0,   0,   0,  67,   0,   0,   0,
       0,  87,   0,   0,   0,   0,   0,  90,  88,   0,  94,
  };
  static const unsigned char aNext[98] = {
       0,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,   0,
       0,  12,   0,   0,   0,   0,   0,   0,  11,   0,   0,   0,   0,
       0,   0,   0,  14,   3,  24,   0,   0,   0,   1,  22,   0,   0,
      36,  23,  28,   0,   0,   0,   0,   0,   0,   0,   0,   5,   0,
       0,  49,  37,   0,   0,   0,  38,   0,  53,   0,  57,  62,   0,
       0,   0,   0,   0,   0,  70,  46,   0,  65,   0,   0,   0,   0,
      69,  73,   0,  77,   0,   0,   0,   0,   0,   0,  81,  85,   0,
      91,  79,  78,   0,   0,  89,   0,
  };
  static const unsigned char aLen[98] = {
       5,   5,   3,   3,   2,   3,   6,   6,   5,   7,   2,   7,   4,
       5,   7,   6,   8,  10,   6,   5,   8,   7,  10,   8,   6,   4,
       6,   8,   4,   4,   4,   3,   6,   9,   7,   4,   3,   7,   4,
       4,   4,   5,   6,   6,   9,   2,   5,   9,   5,   6,   7,   9,
       4,   2,   6,   4,   3,   4,   4,   5,   5,   7,   3,   7,   4,
       2,   6,   2,   2,   5,   5,   6,   7,   5,  10,   7,   8,   5,
       8,   3,   6,   3,   9,   5,   4,   9,   4,  11,   7,   5,   6,
       6,   5,   6,   6,   4,   4,   5,
  };
  static const unsigned short int aOffset[98] = {
       0,   5,  10,  13,  16,  16,  19,  25,  31,  36,  43,  45,  52,
      56,  61,  68,  74,  82,  92,  98, 103, 111, 118, 128, 136, 142,
     146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208,
     212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278,
     287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336,
     340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401,
     406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476,
     482, 488, 493, 499, 505, 509, 513,
  };
  static const unsigned char aCode[98] = {
    TK_ABORT,      TK_AFTER,      TK_ALL,        TK_AND,        TK_AS,         
    TK_ASC,        TK_ATTACH,     TK_BEFORE,     TK_BEGIN,      TK_BETWEEN,    
    TK_BY,         TK_CASCADE,    TK_CASE,       TK_CHECK,      TK_COLLATE,    
    TK_COMMIT,     TK_CONFLICT,   TK_CONSTRAINT, TK_CREATE,     TK_JOIN_KW,    
    TK_DATABASE,   TK_DEFAULT,    TK_DEFERRABLE, TK_DEFERRED,   TK_DELETE,     
    TK_DESC,       TK_DETACH,     TK_DISTINCT,   TK_DROP,       TK_EACH,       
    TK_ELSE,       TK_END,        TK_EXCEPT,     TK_EXCLUSIVE,  TK_EXPLAIN,    
    TK_FAIL,       TK_FOR,        TK_FOREIGN,    TK_FROM,       TK_JOIN_KW,    
    TK_GLOB,       TK_GROUP,      TK_HAVING,     TK_IGNORE,     TK_IMMEDIATE,  
    TK_IN,         TK_INDEX,      TK_INITIALLY,  TK_JOIN_KW,    TK_INSERT,     
    TK_INSTEAD,    TK_INTERSECT,  TK_INTO,       TK_IS,         TK_ISNULL,     
    TK_JOIN,       TK_KEY,        TK_JOIN_KW,    TK_LIKE,       TK_LIMIT,      
    TK_MATCH,      TK_JOIN_KW,    TK_NOT,        TK_NOTNULL,    TK_NULL,       

    TK_OF,         TK_OFFSET,     TK_ON,         TK_OR,         TK_ORDER,      
    TK_JOIN_KW,    TK_PRAGMA,     TK_PRIMARY,    TK_RAISE,      TK_REFERENCES, 
    TK_REPLACE,    TK_RESTRICT,   TK_JOIN_KW,    TK_ROLLBACK,   TK_ROW,        













    TK_SELECT,     TK_SET,        TK_STATEMENT,  TK_TABLE,      TK_TEMP,       





    TK_TEMP,       TK_THEN,       TK_TRANSACTION,TK_TRIGGER,    TK_UNION,      








    TK_UNIQUE,     TK_UPDATE,     TK_USING,      TK_VACUUM,     TK_VALUES,     
    TK_VIEW,       TK_WHEN,       TK_WHERE,      

  };














  int h, i;






















  if( n<2 ) return TK_ID;
  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + 
      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +
      n) % 154;
  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
    if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
      return aCode[i];
    }
  }
  return TK_ID;
}


/*
** If X is a character that can be used in an identifier and
** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then
** X is always an identifier character.  (Hence all UTF-8
** characters can be part of an identifier).  isIdChar[X] will
** be 0 for every character in the lower 128 ASCII characters

Added tool/mkkeywordhash.c.

















































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
/*
** Compile and run this standalone program in order to generate code that
** implements a function that will translate alphabetic identifiers into
** parser token codes.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/*
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
  char *zName;         /* The keyword name */
  char *zTokenType;    /* Token value for this keyword */
  int hash;            /* Hash on the keyword */
  int offset;          /* Offset to start of name string */
  int len;             /* Length of this keyword, not counting final \000 */
  int iNext;           /* Index in aKeywordTable[] of next with same hash */
};

/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
  { "ABORT",            "TK_ABORT",        },
  { "AFTER",            "TK_AFTER",        },
  { "ALL",              "TK_ALL",          },
  { "AND",              "TK_AND",          },
  { "AS",               "TK_AS",           },
  { "ASC",              "TK_ASC",          },
  { "ATTACH",           "TK_ATTACH",       },
  { "BEFORE",           "TK_BEFORE",       },
  { "BEGIN",            "TK_BEGIN",        },
  { "BETWEEN",          "TK_BETWEEN",      },
  { "BY",               "TK_BY",           },
  { "CASCADE",          "TK_CASCADE",      },
  { "CASE",             "TK_CASE",         },
  { "CHECK",            "TK_CHECK",        },
  { "COLLATE",          "TK_COLLATE",      },
  { "COMMIT",           "TK_COMMIT",       },
  { "CONFLICT",         "TK_CONFLICT",     },
  { "CONSTRAINT",       "TK_CONSTRAINT",   },
  { "CREATE",           "TK_CREATE",       },
  { "CROSS",            "TK_JOIN_KW",      },
  { "DATABASE",         "TK_DATABASE",     },
  { "DEFAULT",          "TK_DEFAULT",      },
  { "DEFERRED",         "TK_DEFERRED",     },
  { "DEFERRABLE",       "TK_DEFERRABLE",   },
  { "DELETE",           "TK_DELETE",       },
  { "DESC",             "TK_DESC",         },
  { "DETACH",           "TK_DETACH",       },
  { "DISTINCT",         "TK_DISTINCT",     },
  { "DROP",             "TK_DROP",         },
  { "END",              "TK_END",          },
  { "EACH",             "TK_EACH",         },
  { "ELSE",             "TK_ELSE",         },
  { "EXCEPT",           "TK_EXCEPT",       },
  { "EXCLUSIVE",        "TK_EXCLUSIVE",    },
  { "EXPLAIN",          "TK_EXPLAIN",      },
  { "FAIL",             "TK_FAIL",         },
  { "FOR",              "TK_FOR",          },
  { "FOREIGN",          "TK_FOREIGN",      },
  { "FROM",             "TK_FROM",         },
  { "FULL",             "TK_JOIN_KW",      },
  { "GLOB",             "TK_GLOB",         },
  { "GROUP",            "TK_GROUP",        },
  { "HAVING",           "TK_HAVING",       },
  { "IGNORE",           "TK_IGNORE",       },
  { "IMMEDIATE",        "TK_IMMEDIATE",    },
  { "IN",               "TK_IN",           },
  { "INDEX",            "TK_INDEX",        },
  { "INITIALLY",        "TK_INITIALLY",    },
  { "INNER",            "TK_JOIN_KW",      },
  { "INSERT",           "TK_INSERT",       },
  { "INSTEAD",          "TK_INSTEAD",      },
  { "INTERSECT",        "TK_INTERSECT",    },
  { "INTO",             "TK_INTO",         },
  { "IS",               "TK_IS",           },
  { "ISNULL",           "TK_ISNULL",       },
  { "JOIN",             "TK_JOIN",         },
  { "KEY",              "TK_KEY",          },
  { "LEFT",             "TK_JOIN_KW",      },
  { "LIKE",             "TK_LIKE",         },
  { "LIMIT",            "TK_LIMIT",        },
  { "MATCH",            "TK_MATCH",        },
  { "NATURAL",          "TK_JOIN_KW",      },
  { "NOT",              "TK_NOT",          },
  { "NOTNULL",          "TK_NOTNULL",      },
  { "NULL",             "TK_NULL",         },
  { "OF",               "TK_OF",           },
  { "OFFSET",           "TK_OFFSET",       },
  { "ON",               "TK_ON",           },
  { "OR",               "TK_OR",           },
  { "ORDER",            "TK_ORDER",        },
  { "OUTER",            "TK_JOIN_KW",      },
  { "PRAGMA",           "TK_PRAGMA",       },
  { "PRIMARY",          "TK_PRIMARY",      },
  { "RAISE",            "TK_RAISE",        },
  { "REFERENCES",       "TK_REFERENCES",   },
  { "REPLACE",          "TK_REPLACE",      },
  { "RESTRICT",         "TK_RESTRICT",     },
  { "RIGHT",            "TK_JOIN_KW",      },
  { "ROLLBACK",         "TK_ROLLBACK",     },
  { "ROW",              "TK_ROW",          },
  { "SELECT",           "TK_SELECT",       },
  { "SET",              "TK_SET",          },
  { "STATEMENT",        "TK_STATEMENT",    },
  { "TABLE",            "TK_TABLE",        },
  { "TEMP",             "TK_TEMP",         },
  { "TEMPORARY",        "TK_TEMP",         },
  { "THEN",             "TK_THEN",         },
  { "TRANSACTION",      "TK_TRANSACTION",  },
  { "TRIGGER",          "TK_TRIGGER",      },
  { "UNION",            "TK_UNION",        },
  { "UNIQUE",           "TK_UNIQUE",       },
  { "UPDATE",           "TK_UPDATE",       },
  { "USING",            "TK_USING",        },
  { "VACUUM",           "TK_VACUUM",       },
  { "VALUES",           "TK_VALUES",       },
  { "VIEW",             "TK_VIEW",         },
  { "WHEN",             "TK_WHEN",         },
  { "WHERE",            "TK_WHERE",        },
};

/* Number of keywords */
#define NKEYWORD (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]))

/* An array to map all upper-case characters into their corresponding
** lower-case character. 
*/
const unsigned char sqlite3UpperToLower[] = {
      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
    108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
    126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
    162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
    180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
    198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
    216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
    234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
    252,253,254,255
};
#define UpperToLower sqlite3UpperToLower

/*
** Comparision function for two Keyword records
*/
static int keywordCompare(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;
  return strcmp(pA->zName, pB->zName);
}

/*
** This routine does the work.  The generated code is printed on standard
** output.
*/
int main(int argc, char **argv){
  int i, j, h;
  int bestSize, bestCount;
  int count;
  int nChar;
  int aHash[1000];  /* 1000 is much bigger than NKEYWORD */

  /* Make sure the table is sorted */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);

  /* Fill in the hash value, length, and offset for all entries */
  nChar = 0;
  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    p->len = strlen(p->zName);
    /* p->hash = sqlite3HashNoCase(p->zName, p->len); */
    p->hash = UpperToLower[p->zName[0]]*5 +
              UpperToLower[p->zName[p->len-1]]*3 + p->len;
    p->offset = nChar;
    if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
      /* This entry is a prefix of the one that follows.  Do not advance
      ** the offset */
    }else{
      nChar += p->len;
    }
  }

  /* Figure out how big to make the hash table in order to minimize the
  ** number of collisions */
  bestSize = NKEYWORD;
  bestCount = NKEYWORD*NKEYWORD;
  for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
    for(j=0; j<i; j++) aHash[j] = 0;
    for(j=0; j<NKEYWORD; j++){
      h = aKeywordTable[j].hash % i;
      aHash[h] *= 2;
      aHash[h]++;
    }
    for(j=count=0; j<i; j++) count += aHash[j];
    if( count<bestCount ){
      bestCount = count;
      bestSize = i;
    }
  }

  /* Compute the hash */
  for(i=0; i<bestSize; i++) aHash[i] = 0;
  for(i=0; i<NKEYWORD; i++){
    h = aKeywordTable[i].hash % bestSize;
    aKeywordTable[i].iNext = aHash[h];
    aHash[h] = i+1;
  }

  /* Begin generating code */
  printf("int sqlite3KeywordCode(const char *z, int n){\n");

  printf("  static const char zText[%d] =\n", nChar+1);
  for(i=j=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
    if( j==0 ) printf("    \"");
    printf("%s", p->zName);
    j += p->len;
    if( j>60 ){
      printf("\"\n");
      j = 0;
    }
  }
  printf("%s;\n", j>0 ? "\"" : "  ");

  printf("  static const unsigned char aHash[%d] = {\n", bestSize);
  for(i=j=0; i<bestSize; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aHash[i]);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    

  printf("  static const unsigned char aNext[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].iNext);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    

  printf("  static const unsigned char aLen[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].len);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    

  printf("  static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].offset);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  static const unsigned char aCode[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    char *zToken = aKeywordTable[i].zTokenType;
    if( j==0 ) printf("    ");
    printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
    j++;
    if( j>=5 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  int h, i;\n");
  printf("  if( n<2 ) return TK_ID;\n");
  printf("  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + \n"
         "      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +\n"
         "      n) %% %d;\n", bestSize);
  printf("  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n");
  printf("    if( aLen[i]==n &&"
                   " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n");
  printf("      return aCode[i];\n");
  printf("    }\n");
  printf("  }\n");
  printf("  return TK_ID;\n");
  printf("}\n");

  return 0;
}

Changes to www/index.tcl.

18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
..
76
77
78
79
80
81
82
83
    even after system crashes and power failures.
<li>Zero-configuration - no setup or administration needed.</li>
<li>Implements most of SQL92.
    (<a href="omitted.html">Features not supported</a>)</li>
<li>A complete database is stored in a single disk file.</li>
<li>Database files can be freely shared between machines with
    different byte orders.</li>
<li>Supports databases up to 2 terabytes (2<sup>41</sup> bytes) in size.</li>

<li>Sizes of strings and BLOBs limited only by available memory.</li>
<li>Small code footprint: less than 30K lines of C code,
    less than 250KB code space (gcc on i486)</li>
<li><a href="speed.html">Faster</a> than popular client/server database
    engines for most common operations.</li>
<li>Simple, easy to use <a href="c_interface.html">API</a>.</li>
<li><a href="tclsqlite.html">TCL bindings</a> included.
    Bindings for many other languages 
    <a href="http://www.sqlite.org/cvstrac/wiki?p=SqliteWrappers">
    available separately.</a></li>
<li>Well-commented source code with over 90% test coverage.</li>
<li>Self-contained: no external dependencies.</li>
<li>Sources are in the <a href="copyright.html">public domain</a>.
    Use for any purpose.</li>
</ul>
</p>

<p>
................................................................................
}
  

puts {
<p align="right"><a href="oldnews.html">Old news...</a></p>
</td></tr></table>
}
footer {$Id: index.tcl,v 1.97 2004/09/18 18:51:09 drh Exp $}







|
>










|







 







|
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
..
77
78
79
80
81
82
83
84
    even after system crashes and power failures.
<li>Zero-configuration - no setup or administration needed.</li>
<li>Implements most of SQL92.
    (<a href="omitted.html">Features not supported</a>)</li>
<li>A complete database is stored in a single disk file.</li>
<li>Database files can be freely shared between machines with
    different byte orders.</li>
<li>Supports databases up to 2 terabytes
    (2<sup><small>41</small></sup> bytes) in size.</li>
<li>Sizes of strings and BLOBs limited only by available memory.</li>
<li>Small code footprint: less than 30K lines of C code,
    less than 250KB code space (gcc on i486)</li>
<li><a href="speed.html">Faster</a> than popular client/server database
    engines for most common operations.</li>
<li>Simple, easy to use <a href="c_interface.html">API</a>.</li>
<li><a href="tclsqlite.html">TCL bindings</a> included.
    Bindings for many other languages 
    <a href="http://www.sqlite.org/cvstrac/wiki?p=SqliteWrappers">
    available separately.</a></li>
<li>Well-commented source code with over 95% test coverage.</li>
<li>Self-contained: no external dependencies.</li>
<li>Sources are in the <a href="copyright.html">public domain</a>.
    Use for any purpose.</li>
</ul>
</p>

<p>
................................................................................
}
  

puts {
<p align="right"><a href="oldnews.html">Old news...</a></p>
</td></tr></table>
}
footer {$Id: index.tcl,v 1.98 2004/10/07 19:03:02 drh Exp $}