Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Tighter encoding of the keyword hash table in the tokenizer. (CVS 2028) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
7b9886f8d4db366bc7dbf25495f0d3b9 |
User & Date: | drh 2004-10-23 05:10:18.000 |
Context
2004-10-25
| ||
20:33 | Minor optimizations in the pragma module. (CVS 2029) (check-in: 63efd50a16 user: drh tags: trunk) | |
2004-10-23
| ||
05:10 | Tighter encoding of the keyword hash table in the tokenizer. (CVS 2028) (check-in: 7b9886f8d4 user: drh tags: trunk) | |
2004-10-22
| ||
20:29 | Add the experimental and scary pragma "writable_schema". (CVS 2027) (check-in: 39f7870a54 user: drh tags: trunk) | |
Changes
Changes to src/tokenize.c.
︙ | ︙ | |||
11 12 13 14 15 16 17 | ************************************************************************* ** An tokenizer for SQL ** ** This file contains C code that splits an SQL input string up into ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. ** | | | | > > | | < < < | | < | | < | | | > | | | | | | | < < | | | | > | > > | | | > | | < < | | | | > | < | | | > > > | > > | > > | | | | < | | | < < < | < | < < | | > | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | ************************************************************************* ** An tokenizer for SQL ** ** This file contains C code that splits an SQL input string up into ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. ** ** $Id: tokenize.c,v 1.92 2004/10/23 05:10:18 drh Exp $ */ #include "sqliteInt.h" #include "os.h" #include <ctype.h> #include <stdlib.h> /* ** This function looks up an identifier to determine if it is a ** keyword. If it is a keyword, the token code of that keyword is ** returned. If the input is not a keyword, TK_ID is returned. ** ** The implementation of this routine was generated by a program, ** mkkeywordhash.c, located in the tool subdirectory of the distribution. ** The output of the mkkeywordhash.c program was manually cut and pasted ** into this file. When the set of keywords for SQLite changes, you ** must modify the mkkeywordhash.c program (to add or remove keywords from ** the data tables) then rerun that program to regenerate this function. */ int sqlite3KeywordCode(const char *z, int n){ static const char zText[443] = "ABORTABLEFTEMPORARYAFTERAISELECTHENDATABASEACHECKEYANDEFAULTRANSACTION" "ATURALIKELSEXCEPTRIGGEREFERENCESTATEMENTATTACHAVINGLOBEFOREIGN" "OREPLACEXCLUSIVEXPLAINDEXBEGINITIALLYBETWEENOTNULLIMITBYCASCADE" "FERRABLECASECOLLATECOMMITCONFLICTCONSTRAINTERSECTCREATECROSSDEFERRED" "ELETEDESCDETACHDISTINCTDROPRAGMATCHFAILFROMFULLGROUPDATEIMMEDIATE" "INNERESTRICTINSERTINSTEADINTOFFSETISNULLJOINORDERIGHTOUTEROLLBACK" "PRIMARYROWHENUNIONUNIQUEUSINGVACUUMVALUESVIEWHERE"; static const unsigned char aHash[154] = { 0, 26, 82, 0, 0, 91, 90, 0, 27, 0, 0, 0, 0, 0, 0, 49, 0, 96, 17, 0, 0, 0, 0, 0, 0, 0, 0, 97, 5, 31, 0, 62, 51, 28, 58, 52, 0, 0, 60, 61, 0, 12, 41, 50, 0, 0, 0, 36, 63, 0, 0, 15, 0, 0, 0, 39, 0, 42, 0, 0, 0, 0, 78, 0, 34, 29, 0, 74, 71, 0, 66, 70, 37, 0, 0, 59, 0, 33, 0, 53, 0, 54, 0, 55, 0, 83, 72, 67, 0, 24, 0, 0, 79, 80, 84, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 45, 77, 35, 44, 57, 0, 0, 0, 0, 20, 2, 0, 38, 0, 3, 46, 93, 0, 0, 40, 0, 94, 0, 43, 87, 98, 0, 0, 0, 0, 0, 81, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 92, 19, 0, 95, }; static const unsigned char aNext[98] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 18, 22, 0, 0, 0, 0, 0, 0, 0, 23, 0, 16, 21, 8, 0, 32, 0, 0, 30, 0, 48, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 56, 0, 1, 0, 69, 64, 0, 0, 65, 0, 0, 13, 68, 0, 0, 76, 47, 0, 0, 0, 85, 6, 0, 89, 25, 4, 73, 88, 86, 0, 0, 0, }; static const unsigned char aLen[98] = { 5, 5, 4, 4, 9, 2, 5, 5, 6, 4, 3, 8, 2, 4, 5, 3, 3, 7, 11, 2, 7, 4, 4, 6, 7, 10, 9, 6, 6, 4, 6, 3, 7, 6, 7, 9, 7, 5, 5, 9, 3, 7, 3, 7, 4, 5, 2, 7, 3, 10, 4, 7, 6, 8, 10, 2, 9, 6, 5, 8, 6, 4, 6, 8, 2, 4, 6, 5, 4, 4, 4, 5, 6, 9, 5, 8, 6, 7, 4, 2, 6, 3, 6, 4, 5, 5, 5, 8, 7, 3, 4, 5, 6, 5, 6, 6, 4, 5, }; static const unsigned short int aOffset[98] = { 0, 4, 7, 10, 10, 14, 19, 23, 26, 31, 33, 35, 40, 42, 44, 48, 51, 53, 59, 68, 69, 75, 78, 81, 86, 92, 101, 110, 115, 120, 123, 125, 125, 129, 133, 139, 147, 152, 157, 160, 165, 169, 175, 175, 178, 181, 186, 188, 189, 193, 203, 207, 214, 220, 228, 235, 235, 244, 250, 255, 262, 268, 272, 278, 279, 286, 289, 293, 298, 302, 306, 310, 313, 319, 328, 332, 340, 346, 353, 356, 356, 359, 362, 368, 372, 376, 381, 385, 393, 400, 402, 406, 411, 417, 422, 428, 434, 437, }; static const unsigned char aCode[98] = { TK_ABORT, TK_TABLE, TK_JOIN_KW, TK_TEMP, TK_TEMP, TK_OR, TK_AFTER, TK_RAISE, TK_SELECT, TK_THEN, TK_END, TK_DATABASE, TK_AS, TK_EACH, TK_CHECK, TK_KEY, TK_AND, TK_DEFAULT, TK_TRANSACTION,TK_ON, TK_JOIN_KW, TK_LIKE, TK_ELSE, TK_EXCEPT, TK_TRIGGER, TK_REFERENCES, TK_STATEMENT, TK_ATTACH, TK_HAVING, TK_GLOB, TK_BEFORE, TK_FOR, TK_FOREIGN, TK_IGNORE, TK_REPLACE, TK_EXCLUSIVE, TK_EXPLAIN, TK_INDEX, TK_BEGIN, TK_INITIALLY, TK_ALL, TK_BETWEEN, TK_NOT, TK_NOTNULL, TK_NULL, TK_LIMIT, TK_BY, TK_CASCADE, TK_ASC, TK_DEFERRABLE, TK_CASE, TK_COLLATE, TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, TK_IN, TK_INTERSECT, TK_CREATE, TK_JOIN_KW, TK_DEFERRED, TK_DELETE, TK_DESC, TK_DETACH, TK_DISTINCT, TK_IS, TK_DROP, TK_PRAGMA, TK_MATCH, TK_FAIL, TK_FROM, TK_JOIN_KW, TK_GROUP, TK_UPDATE, TK_IMMEDIATE, TK_JOIN_KW, TK_RESTRICT, TK_INSERT, TK_INSTEAD, TK_INTO, TK_OF, TK_OFFSET, TK_SET, TK_ISNULL, TK_JOIN, TK_ORDER, TK_JOIN_KW, TK_JOIN_KW, TK_ROLLBACK, TK_PRIMARY, TK_ROW, TK_WHEN, TK_UNION, TK_UNIQUE, TK_USING, TK_VACUUM, TK_VALUES, TK_VIEW, TK_WHERE, }; int h, i; if( n<2 ) return TK_ID; h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 + n) % 154; for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){ if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){ return aCode[i]; } } return TK_ID; } /* ** If X is a character that can be used in an identifier and ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then ** X is always an identifier character. (Hence all UTF-8 ** characters can be part of an identifier). isIdChar[X] will ** be 0 for every character in the lower 128 ASCII characters |
︙ | ︙ |
Changes to tool/mkkeywordhash.c.
︙ | ︙ | |||
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | ** All the keywords of the SQL language are stored as in a hash ** table composed of instances of the following structure. */ typedef struct Keyword Keyword; struct Keyword { char *zName; /* The keyword name */ char *zTokenType; /* Token value for this keyword */ int hash; /* Hash on the keyword */ int offset; /* Offset to start of name string */ int len; /* Length of this keyword, not counting final \000 */ int iNext; /* Index in aKeywordTable[] of next with same hash */ }; /* ** These are the keywords */ static Keyword aKeywordTable[] = { { "ABORT", "TK_ABORT", }, | > > > > | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | ** All the keywords of the SQL language are stored as in a hash ** table composed of instances of the following structure. */ typedef struct Keyword Keyword; struct Keyword { char *zName; /* The keyword name */ char *zTokenType; /* Token value for this keyword */ int id; /* Unique ID for this record */ int hash; /* Hash on the keyword */ int offset; /* Offset to start of name string */ int len; /* Length of this keyword, not counting final \000 */ int prefix; /* Number of characters in prefix */ int iNext; /* Index in aKeywordTable[] of next with same hash */ int substrId; /* Id to another keyword this keyword is embedded in */ int substrOffset; /* Offset into substrId for start of this keyword */ }; /* ** These are the keywords */ static Keyword aKeywordTable[] = { { "ABORT", "TK_ABORT", }, |
︙ | ︙ | |||
149 150 151 152 153 154 155 | 252,253,254,255 }; #define UpperToLower sqlite3UpperToLower /* ** Comparision function for two Keyword records */ | | > > | > > > > > > > > > > > > > > > > > > > > > > > > > | < < | < < < > > > > > > > > > > > > > > > > | > > > > > | > > | | > > > > | | > > > > > > > > > > > > > > | | | > > > > > > > > > > > | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | 252,253,254,255 }; #define UpperToLower sqlite3UpperToLower /* ** Comparision function for two Keyword records */ static int keywordCompare1(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = pA->len - pB->len; if( n==0 ){ n = strcmp(pA->zName, pB->zName); } return n; } static int keywordCompare2(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = strcmp(pA->zName, pB->zName); return n; } static int keywordCompare3(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; int n = pA->offset - pB->offset; return n; } /* ** Return a KeywordTable entry with the given id */ static Keyword *findById(int id){ int i; for(i=0; i<NKEYWORD; i++){ if( aKeywordTable[i].id==id ) break; } return &aKeywordTable[i]; } /* ** This routine does the work. The generated code is printed on standard ** output. */ int main(int argc, char **argv){ int i, j, k, h; int bestSize, bestCount; int count; int nChar; int aHash[1000]; /* 1000 is much bigger than NKEYWORD */ /* Fill in the lengths of strings and hashes for all entries. */ for(i=0; i<NKEYWORD; i++){ Keyword *p = &aKeywordTable[i]; p->len = strlen(p->zName); p->hash = UpperToLower[p->zName[0]]*5 + UpperToLower[p->zName[p->len-1]]*3 + p->len; p->id = i+1; } /* Sort the table from shortest to longest keyword */ qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1); /* Look for short keywords embedded in longer keywords */ for(i=NKEYWORD-2; i>=0; i--){ Keyword *p = &aKeywordTable[i]; for(j=NKEYWORD-1; j>i && p->substrId==0; j--){ Keyword *pOther = &aKeywordTable[j]; if( pOther->substrId ) continue; if( pOther->len<=p->len ) continue; for(k=0; k<=pOther->len-p->len; k++){ if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ p->substrId = pOther->id; p->substrOffset = k; break; } } } } /* Sort the table into alphabetical order */ qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2); /* Fill in the offset for all entries */ nChar = 0; for(i=0; i<NKEYWORD; i++){ Keyword *p = &aKeywordTable[i]; if( p->offset>0 || p->substrId ) continue; p->offset = nChar; nChar += p->len; for(k=p->len-1; k>=1; k--){ for(j=i+1; j<NKEYWORD; j++){ Keyword *pOther = &aKeywordTable[j]; if( pOther->offset>0 || pOther->substrId ) continue; if( pOther->len<=k ) continue; if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ p = pOther; p->offset = nChar - k; nChar = p->offset + p->len; p->zName += k; p->len -= k; p->prefix = k; j = i; k = p->len; } } } } for(i=0; i<NKEYWORD; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ){ p->offset = findById(p->substrId)->offset + p->substrOffset; } } /* Sort the table by offset */ qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3); /* Figure out how big to make the hash table in order to minimize the ** number of collisions */ bestSize = NKEYWORD; bestCount = NKEYWORD*NKEYWORD; for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){ for(j=0; j<i; j++) aHash[j] = 0; for(j=0; j<NKEYWORD; j++){ |
︙ | ︙ | |||
218 219 220 221 222 223 224 | /* Begin generating code */ printf("int sqlite3KeywordCode(const char *z, int n){\n"); printf(" static const char zText[%d] =\n", nChar+1); for(i=j=0; i<NKEYWORD; i++){ Keyword *p = &aKeywordTable[i]; | | | 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | /* Begin generating code */ printf("int sqlite3KeywordCode(const char *z, int n){\n"); printf(" static const char zText[%d] =\n", nChar+1); for(i=j=0; i<NKEYWORD; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ) continue; if( j==0 ) printf(" \""); printf("%s", p->zName); j += p->len; if( j>60 ){ printf("\"\n"); j = 0; } |
︙ | ︙ | |||
256 257 258 259 260 261 262 | } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD); for(i=j=0; i<NKEYWORD; i++){ if( j==0 ) printf(" "); | | | 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | } } printf("%s };\n", j==0 ? "" : "\n"); printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD); for(i=j=0; i<NKEYWORD; i++){ if( j==0 ) printf(" "); printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix); j++; if( j>12 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); |
︙ | ︙ |