/ Check-in [7b9886f8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Tighter encoding of the keyword hash table in the tokenizer. (CVS 2028)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b9886f8d4db366bc7dbf25495f0d3b907d25689
User & Date: drh 2004-10-23 05:10:18
Context
2004-10-25
20:33
Minor optimizations in the pragma module. (CVS 2029) check-in: 63efd50a user: drh tags: trunk
2004-10-23
05:10
Tighter encoding of the keyword hash table in the tokenizer. (CVS 2028) check-in: 7b9886f8 user: drh tags: trunk
2004-10-22
20:29
Add the experimental and scary pragma "writable_schema". (CVS 2027) check-in: 39f7870a user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/tokenize.c.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................
** mkkeywordhash.c, located in the tool subdirectory of the distribution.
** The output of the mkkeywordhash.c program was manually cut and pasted
** into this file.  When the set of keywords for SQLite changes, you
** must modify the mkkeywordhash.c program (to add or remove keywords from
** the data tables) then rerun that program to regenerate this function.
*/
int sqlite3KeywordCode(const char *z, int n){
  static const char zText[519] =
    "ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK"
    "COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE"
    "DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE"
    "EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX"
    "INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE"
    "LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE"
    "REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE"
    "TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES"
    "VIEWWHENWHERE";
  static const unsigned char aHash[154] = {
       0,  75,  82,   0,   0,  97,  80,   0,  83,   0,   0,   0,   0,
       0,   0,   6,   0,  95,   4,   0,   0,   0,   0,   0,   0,   0,
       0,  96,  86,   8,   0,  26,  13,   7,  19,  15,   0,   0,  32,
      25,   0,  21,  31,  41,   0,   0,   0,  34,  27,   0,   0,  30,
       0,   0,   0,   9,   0,  10,   0,   0,   0,   0,  51,   0,  44,
      43,   0,  45,  40,   0,  29,  39,  35,   0,   0,  20,   0,  59,
       0,  16,   0,  17,   0,  18,   0,  55,  42,  72,   0,  33,   0,
       0,  61,  66,  56,   0,   0,   0,   0,   0,   0,   0,  54,   0,
       0,   0,   0,   0,  74,  50,  76,  64,  52,   0,   0,   0,   0,
      68,  84,   0,  47,   0,  58,  60,  92,   0,   0,  48,   0,  93,
       0,  63,  71,  98,   0,   0,   0,   0,   0,  67,   0,   0,   0,
       0,  87,   0,   0,   0,   0,   0,  90,  88,   0,  94,
  };
  static const unsigned char aNext[98] = {
       0,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,   0,
       0,  12,   0,   0,   0,   0,   0,   0,  11,   0,   0,   0,   0,
       0,   0,   0,  14,   3,  24,   0,   0,   0,   1,  22,   0,   0,
      36,  23,  28,   0,   0,   0,   0,   0,   0,   0,   0,   5,   0,
       0,  49,  37,   0,   0,   0,  38,   0,  53,   0,  57,  62,   0,
       0,   0,   0,   0,   0,  70,  46,   0,  65,   0,   0,   0,   0,
      69,  73,   0,  77,   0,   0,   0,   0,   0,   0,  81,  85,   0,
      91,  79,  78,   0,   0,  89,   0,
  };
  static const unsigned char aLen[98] = {
       5,   5,   3,   3,   2,   3,   6,   6,   5,   7,   2,   7,   4,
       5,   7,   6,   8,  10,   6,   5,   8,   7,  10,   8,   6,   4,
       6,   8,   4,   4,   4,   3,   6,   9,   7,   4,   3,   7,   4,
       4,   4,   5,   6,   6,   9,   2,   5,   9,   5,   6,   7,   9,
       4,   2,   6,   4,   3,   4,   4,   5,   5,   7,   3,   7,   4,
       2,   6,   2,   2,   5,   5,   6,   7,   5,  10,   7,   8,   5,
       8,   3,   6,   3,   9,   5,   4,   9,   4,  11,   7,   5,   6,
       6,   5,   6,   6,   4,   4,   5,
  };
  static const unsigned short int aOffset[98] = {
       0,   5,  10,  13,  16,  16,  19,  25,  31,  36,  43,  45,  52,
      56,  61,  68,  74,  82,  92,  98, 103, 111, 118, 128, 136, 142,
     146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208,
     212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278,
     287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336,
     340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401,
     406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476,
     482, 488, 493, 499, 505, 509, 513,
  };
  static const unsigned char aCode[98] = {
    TK_ABORT,      TK_AFTER,      TK_ALL,        TK_AND,        TK_AS,         
    TK_ASC,        TK_ATTACH,     TK_BEFORE,     TK_BEGIN,      TK_BETWEEN,    
    TK_BY,         TK_CASCADE,    TK_CASE,       TK_CHECK,      TK_COLLATE,    
    TK_COMMIT,     TK_CONFLICT,   TK_CONSTRAINT, TK_CREATE,     TK_JOIN_KW,    
    TK_DATABASE,   TK_DEFAULT,    TK_DEFERRABLE, TK_DEFERRED,   TK_DELETE,     
    TK_DESC,       TK_DETACH,     TK_DISTINCT,   TK_DROP,       TK_EACH,       
    TK_ELSE,       TK_END,        TK_EXCEPT,     TK_EXCLUSIVE,  TK_EXPLAIN,    
    TK_FAIL,       TK_FOR,        TK_FOREIGN,    TK_FROM,       TK_JOIN_KW,    
    TK_GLOB,       TK_GROUP,      TK_HAVING,     TK_IGNORE,     TK_IMMEDIATE,  
    TK_IN,         TK_INDEX,      TK_INITIALLY,  TK_JOIN_KW,    TK_INSERT,     
    TK_INSTEAD,    TK_INTERSECT,  TK_INTO,       TK_IS,         TK_ISNULL,     
    TK_JOIN,       TK_KEY,        TK_JOIN_KW,    TK_LIKE,       TK_LIMIT,      
    TK_MATCH,      TK_JOIN_KW,    TK_NOT,        TK_NOTNULL,    TK_NULL,       
    TK_OF,         TK_OFFSET,     TK_ON,         TK_OR,         TK_ORDER,      
    TK_JOIN_KW,    TK_PRAGMA,     TK_PRIMARY,    TK_RAISE,      TK_REFERENCES, 
    TK_REPLACE,    TK_RESTRICT,   TK_JOIN_KW,    TK_ROLLBACK,   TK_ROW,        
    TK_SELECT,     TK_SET,        TK_STATEMENT,  TK_TABLE,      TK_TEMP,       
    TK_TEMP,       TK_THEN,       TK_TRANSACTION,TK_TRIGGER,    TK_UNION,      
    TK_UNIQUE,     TK_UPDATE,     TK_USING,      TK_VACUUM,     TK_VALUES,     
    TK_VIEW,       TK_WHEN,       TK_WHERE,      
  };
  int h, i;
  if( n<2 ) return TK_ID;
  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + 
      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +
      n) % 154;
  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
    if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
      return aCode[i];
    }
  }
  return TK_ID;
}


/*
** If X is a character that can be used in an identifier and
** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then
** X is always an identifier character.  (Hence all UTF-8
** characters can be part of an identifier).  isIdChar[X] will
** be 0 for every character in the lower 128 ASCII characters







|







 







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<













>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
*************************************************************************
** An tokenizer for SQL
**
** This file contains C code that splits an SQL input string up into
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.92 2004/10/23 05:10:18 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>

/*
................................................................................
** mkkeywordhash.c, located in the tool subdirectory of the distribution.
** The output of the mkkeywordhash.c program was manually cut and pasted
** into this file.  When the set of keywords for SQLite changes, you
** must modify the mkkeywordhash.c program (to add or remove keywords from
** the data tables) then rerun that program to regenerate this function.
*/
int sqlite3KeywordCode(const char *z, int n){
  static const char zText[443] =
    "ABORTABLEFTEMPORARYAFTERAISELECTHENDATABASEACHECKEYANDEFAULTRANSACTION"
    "ATURALIKELSEXCEPTRIGGEREFERENCESTATEMENTATTACHAVINGLOBEFOREIGN"
    "OREPLACEXCLUSIVEXPLAINDEXBEGINITIALLYBETWEENOTNULLIMITBYCASCADE"
    "FERRABLECASECOLLATECOMMITCONFLICTCONSTRAINTERSECTCREATECROSSDEFERRED"
    "ELETEDESCDETACHDISTINCTDROPRAGMATCHFAILFROMFULLGROUPDATEIMMEDIATE"
    "INNERESTRICTINSERTINSTEADINTOFFSETISNULLJOINORDERIGHTOUTEROLLBACK"
    "PRIMARYROWHENUNIONUNIQUEUSINGVACUUMVALUESVIEWHERE";
  static const unsigned char aHash[154] = {
       0,  26,  82,   0,   0,  91,  90,   0,  27,   0,   0,   0,   0,
       0,   0,  49,   0,  96,  17,   0,   0,   0,   0,   0,   0,   0,
       0,  97,   5,  31,   0,  62,  51,  28,  58,  52,   0,   0,  60,
      61,   0,  12,  41,  50,   0,   0,   0,  36,  63,   0,   0,  15,
       0,   0,   0,  39,   0,  42,   0,   0,   0,   0,  78,   0,  34,
      29,   0,  74,  71,   0,  66,  70,  37,   0,   0,  59,   0,  33,
       0,  53,   0,  54,   0,  55,   0,  83,  72,  67,   0,  24,   0,
       0,  79,  80,  84,   0,   0,   0,   0,   0,   0,   0,  75,   0,
       0,   0,   0,   0,  45,  77,  35,  44,  57,   0,   0,   0,   0,
      20,   2,   0,  38,   0,   3,  46,  93,   0,   0,  40,   0,  94,
       0,  43,  87,  98,   0,   0,   0,   0,   0,  81,   0,   0,   0,
       0,  10,   0,   0,   0,   0,   0,  92,  19,   0,  95,
  };
  static const unsigned char aNext[98] = {
       0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,
       0,  14,   0,   0,   0,   0,   0,   0,   0,   0,   0,   9,   0,
       0,   0,   0,   0,   0,  18,  22,   0,   0,   0,   0,   0,   0,
       0,  23,   0,  16,  21,   8,   0,  32,   0,   0,  30,   0,  48,
       0,   0,   0,   0,   0,   0,   0,  11,   0,   0,   0,   0,   0,
       0,  56,   0,   1,   0,  69,  64,   0,   0,  65,   0,   0,  13,
      68,   0,   0,  76,  47,   0,   0,   0,  85,   6,   0,  89,  25,
       4,  73,  88,  86,   0,   0,   0,
  };
  static const unsigned char aLen[98] = {
       5,   5,   4,   4,   9,   2,   5,   5,   6,   4,   3,   8,   2,
       4,   5,   3,   3,   7,  11,   2,   7,   4,   4,   6,   7,  10,
       9,   6,   6,   4,   6,   3,   7,   6,   7,   9,   7,   5,   5,
       9,   3,   7,   3,   7,   4,   5,   2,   7,   3,  10,   4,   7,
       6,   8,  10,   2,   9,   6,   5,   8,   6,   4,   6,   8,   2,
       4,   6,   5,   4,   4,   4,   5,   6,   9,   5,   8,   6,   7,
       4,   2,   6,   3,   6,   4,   5,   5,   5,   8,   7,   3,   4,
       5,   6,   5,   6,   6,   4,   5,
  };
  static const unsigned short int aOffset[98] = {
       0,   4,   7,  10,  10,  14,  19,  23,  26,  31,  33,  35,  40,
      42,  44,  48,  51,  53,  59,  68,  69,  75,  78,  81,  86,  92,
     101, 110, 115, 120, 123, 125, 125, 129, 133, 139, 147, 152, 157,
     160, 165, 169, 175, 175, 178, 181, 186, 188, 189, 193, 203, 207,
     214, 220, 228, 235, 235, 244, 250, 255, 262, 268, 272, 278, 279,
     286, 289, 293, 298, 302, 306, 310, 313, 319, 328, 332, 340, 346,
     353, 356, 356, 359, 362, 368, 372, 376, 381, 385, 393, 400, 402,
     406, 411, 417, 422, 428, 434, 437,
  };
  static const unsigned char aCode[98] = {
    TK_ABORT,      TK_TABLE,      TK_JOIN_KW,    TK_TEMP,       TK_TEMP,       
    TK_OR,         TK_AFTER,      TK_RAISE,      TK_SELECT,     TK_THEN,       
    TK_END,        TK_DATABASE,   TK_AS,         TK_EACH,       TK_CHECK,      
    TK_KEY,        TK_AND,        TK_DEFAULT,    TK_TRANSACTION,TK_ON,         
    TK_JOIN_KW,    TK_LIKE,       TK_ELSE,       TK_EXCEPT,     TK_TRIGGER,    
    TK_REFERENCES, TK_STATEMENT,  TK_ATTACH,     TK_HAVING,     TK_GLOB,       
    TK_BEFORE,     TK_FOR,        TK_FOREIGN,    TK_IGNORE,     TK_REPLACE,    
    TK_EXCLUSIVE,  TK_EXPLAIN,    TK_INDEX,      TK_BEGIN,      TK_INITIALLY,  
    TK_ALL,        TK_BETWEEN,    TK_NOT,        TK_NOTNULL,    TK_NULL,       
    TK_LIMIT,      TK_BY,         TK_CASCADE,    TK_ASC,        TK_DEFERRABLE, 
    TK_CASE,       TK_COLLATE,    TK_COMMIT,     TK_CONFLICT,   TK_CONSTRAINT, 
    TK_IN,         TK_INTERSECT,  TK_CREATE,     TK_JOIN_KW,    TK_DEFERRED,   
    TK_DELETE,     TK_DESC,       TK_DETACH,     TK_DISTINCT,   TK_IS,         
    TK_DROP,       TK_PRAGMA,     TK_MATCH,      TK_FAIL,       TK_FROM,       
    TK_JOIN_KW,    TK_GROUP,      TK_UPDATE,     TK_IMMEDIATE,  TK_JOIN_KW,    
    TK_RESTRICT,   TK_INSERT,     TK_INSTEAD,    TK_INTO,       TK_OF,         
    TK_OFFSET,     TK_SET,        TK_ISNULL,     TK_JOIN,       TK_ORDER,      
    TK_JOIN_KW,    TK_JOIN_KW,    TK_ROLLBACK,   TK_PRIMARY,    TK_ROW,        
    TK_WHEN,       TK_UNION,      TK_UNIQUE,     TK_USING,      TK_VACUUM,     
    TK_VALUES,     TK_VIEW,       TK_WHERE,      


  };
  int h, i;
  if( n<2 ) return TK_ID;
  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + 
      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +
      n) % 154;
  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
    if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
      return aCode[i];
    }
  }
  return TK_ID;
}


/*
** If X is a character that can be used in an identifier and
** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then
** X is always an identifier character.  (Hence all UTF-8
** characters can be part of an identifier).  isIdChar[X] will
** be 0 for every character in the lower 128 ASCII characters

Changes to tool/mkkeywordhash.c.

11
12
13
14
15
16
17

18
19
20

21


22
23
24
25
26
27
28
...
149
150
151
152
153
154
155
156
157
158


159

























160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183































184
185
186








187
188





189
190
191











192
193
194
195
196
197
198
...
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
...
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
  char *zName;         /* The keyword name */
  char *zTokenType;    /* Token value for this keyword */

  int hash;            /* Hash on the keyword */
  int offset;          /* Offset to start of name string */
  int len;             /* Length of this keyword, not counting final \000 */

  int iNext;           /* Index in aKeywordTable[] of next with same hash */


};

/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
  { "ABORT",            "TK_ABORT",        },
................................................................................
    252,253,254,255
};
#define UpperToLower sqlite3UpperToLower

/*
** Comparision function for two Keyword records
*/
static int keywordCompare(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;


  return strcmp(pA->zName, pB->zName);

























}

/*
** This routine does the work.  The generated code is printed on standard
** output.
*/
int main(int argc, char **argv){
  int i, j, h;
  int bestSize, bestCount;
  int count;
  int nChar;
  int aHash[1000];  /* 1000 is much bigger than NKEYWORD */

  /* Make sure the table is sorted */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);

  /* Fill in the hash value, length, and offset for all entries */
  nChar = 0;
  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    p->len = strlen(p->zName);
    /* p->hash = sqlite3HashNoCase(p->zName, p->len); */
    p->hash = UpperToLower[p->zName[0]]*5 +
              UpperToLower[p->zName[p->len-1]]*3 + p->len;































    p->offset = nChar;
    if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
      /* This entry is a prefix of the one that follows.  Do not advance








      ** the offset */
    }else{





      nChar += p->len;
    }
  }












  /* Figure out how big to make the hash table in order to minimize the
  ** number of collisions */
  bestSize = NKEYWORD;
  bestCount = NKEYWORD*NKEYWORD;
  for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
    for(j=0; j<i; j++) aHash[j] = 0;
................................................................................

  /* Begin generating code */
  printf("int sqlite3KeywordCode(const char *z, int n){\n");

  printf("  static const char zText[%d] =\n", nChar+1);
  for(i=j=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
    if( j==0 ) printf("    \"");
    printf("%s", p->zName);
    j += p->len;
    if( j>60 ){
      printf("\"\n");
      j = 0;
    }
................................................................................
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    

  printf("  static const unsigned char aLen[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].len);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    







>



>

>
>







 







|


>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







|





|
<
<
<
<



<


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
<
>
>
>
>
>
>
>
>
|
<
>
>
>
>
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>







 







|







 







|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204




205
206
207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241


242
243
244
245
246
247
248
249
250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
...
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
  char *zName;         /* The keyword name */
  char *zTokenType;    /* Token value for this keyword */
  int id;              /* Unique ID for this record */
  int hash;            /* Hash on the keyword */
  int offset;          /* Offset to start of name string */
  int len;             /* Length of this keyword, not counting final \000 */
  int prefix;          /* Number of characters in prefix */
  int iNext;           /* Index in aKeywordTable[] of next with same hash */
  int substrId;        /* Id to another keyword this keyword is embedded in */
  int substrOffset;    /* Offset into substrId for start of this keyword */
};

/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
  { "ABORT",            "TK_ABORT",        },
................................................................................
    252,253,254,255
};
#define UpperToLower sqlite3UpperToLower

/*
** Comparision function for two Keyword records
*/
static int keywordCompare1(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;
  int n = pA->len - pB->len;
  if( n==0 ){
    n = strcmp(pA->zName, pB->zName);
  }
  return n;
}
static int keywordCompare2(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;
  int n = strcmp(pA->zName, pB->zName);
  return n;
}
static int keywordCompare3(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;
  int n = pA->offset - pB->offset;
  return n;
}

/*
** Return a KeywordTable entry with the given id
*/
static Keyword *findById(int id){
  int i;
  for(i=0; i<NKEYWORD; i++){
    if( aKeywordTable[i].id==id ) break;
  }
  return &aKeywordTable[i];
}

/*
** This routine does the work.  The generated code is printed on standard
** output.
*/
int main(int argc, char **argv){
  int i, j, k, h;
  int bestSize, bestCount;
  int count;
  int nChar;
  int aHash[1000];  /* 1000 is much bigger than NKEYWORD */

  /* Fill in the lengths of strings and hashes for all entries. */




  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    p->len = strlen(p->zName);

    p->hash = UpperToLower[p->zName[0]]*5 +
              UpperToLower[p->zName[p->len-1]]*3 + p->len;
    p->id = i+1;
  }

  /* Sort the table from shortest to longest keyword */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1);

  /* Look for short keywords embedded in longer keywords */
  for(i=NKEYWORD-2; i>=0; i--){
    Keyword *p = &aKeywordTable[i];
    for(j=NKEYWORD-1; j>i && p->substrId==0; j--){
      Keyword *pOther = &aKeywordTable[j];
      if( pOther->substrId ) continue;
      if( pOther->len<=p->len ) continue;
      for(k=0; k<=pOther->len-p->len; k++){
        if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
          p->substrId = pOther->id;
          p->substrOffset = k;
          break;
        }
      }
    }
  }

  /* Sort the table into alphabetical order */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2);

  /* Fill in the offset for all entries */
  nChar = 0;
  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( p->offset>0 || p->substrId ) continue;
    p->offset = nChar;


    nChar += p->len;
    for(k=p->len-1; k>=1; k--){
      for(j=i+1; j<NKEYWORD; j++){
        Keyword *pOther = &aKeywordTable[j];
        if( pOther->offset>0 || pOther->substrId ) continue;
        if( pOther->len<=k ) continue;
        if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
          p = pOther;
          p->offset = nChar - k;

          nChar = p->offset + p->len;
          p->zName += k;
          p->len -= k;
          p->prefix = k;
          j = i;
          k = p->len;
        }
      }
    }
  }
  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( p->substrId ){
      p->offset = findById(p->substrId)->offset + p->substrOffset;
    }
  }

  /* Sort the table by offset */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3);

  /* Figure out how big to make the hash table in order to minimize the
  ** number of collisions */
  bestSize = NKEYWORD;
  bestCount = NKEYWORD*NKEYWORD;
  for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
    for(j=0; j<i; j++) aHash[j] = 0;
................................................................................

  /* Begin generating code */
  printf("int sqlite3KeywordCode(const char *z, int n){\n");

  printf("  static const char zText[%d] =\n", nChar+1);
  for(i=j=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( p->substrId ) continue;
    if( j==0 ) printf("    \"");
    printf("%s", p->zName);
    j += p->len;
    if( j>60 ){
      printf("\"\n");
      j = 0;
    }
................................................................................
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");    

  printf("  static const unsigned char aLen[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");