Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Performance enhancement to the tokenizer. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | parser-enhancements |
Files: | files | file ages | folders |
SHA1: |
6ea2df86c95c226052f528424e9bee36 |
User & Date: | drh 2015-11-10 03:30:51.926 |
Context
2015-11-10
| ||
12:31 | Fix harmless compiler warnings in FTS5. (Closed-Leaf check-in: 09752e51a1 user: drh tags: parser-enhancements) | |
03:30 | Performance enhancement to the tokenizer. (check-in: 6ea2df86c9 user: drh tags: parser-enhancements) | |
00:02 | Remove an unused non-terminal from the grammar. (check-in: 3c37c52288 user: drh tags: parser-enhancements) | |
Changes
Changes to src/tokenize.c.
︙ | ︙ | |||
365 366 367 368 369 370 371 | } #endif default: { if( !IdChar(*z) ){ break; } for(i=1; IdChar(z[i]); i++){} | | | | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | } #endif default: { if( !IdChar(*z) ){ break; } for(i=1; IdChar(z[i]); i++){} *tokenType = TK_ID; return keywordCode((char*)z, i, tokenType); } } *tokenType = TK_ILLEGAL; return 1; } /* |
︙ | ︙ |
Changes to tool/mkkeywordhash.c.
︙ | ︙ | |||
273 274 275 276 277 278 279 | { "WHEN", "TK_WHEN", ALWAYS }, { "WHERE", "TK_WHERE", ALWAYS }, }; /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); | | | < < < < < < < < < < < < < < < < < < < | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 | { "WHEN", "TK_WHEN", ALWAYS }, { "WHERE", "TK_WHERE", ALWAYS }, }; /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); /* Map all alphabetic characters into the same case */ #define charMap(X) (0x20|(X)) /* ** Comparision function for two Keyword records */ static int keywordCompare1(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; const Keyword *pB = (Keyword*)b; |
︙ | ︙ | |||
343 344 345 346 347 348 349 | } /* ** This routine does the work. The generated code is printed on standard ** output. */ int main(int argc, char **argv){ | | | 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | } /* ** This routine does the work. The generated code is printed on standard ** output. */ int main(int argc, char **argv){ int i, j, k, h, m; int bestSize, bestCount; int count; int nChar; int totalLen = 0; int aHash[1000]; /* 1000 is much bigger than nKeyword */ char zText[2000]; |
︙ | ︙ | |||
368 369 370 371 372 373 374 | /* Fill in the lengths of strings and hashes for all entries. */ for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; p->len = (int)strlen(p->zName); assert( p->len<sizeof(p->zOrigName) ); memcpy(p->zOrigName, p->zName, p->len+1); totalLen += p->len; | | | | 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | /* Fill in the lengths of strings and hashes for all entries. */ for(i=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; p->len = (int)strlen(p->zName); assert( p->len<sizeof(p->zOrigName) ); memcpy(p->zOrigName, p->zName, p->len+1); totalLen += p->len; p->hash = (charMap(p->zName[0])*4) ^ (charMap(p->zName[p->len-1])*3) ^ (p->len*1); p->id = i+1; } /* Sort the table from shortest to longest keyword */ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1); /* Look for short keywords embedded in longer keywords */ |
︙ | ︙ | |||
477 478 479 480 481 482 483 | aKeywordTable[i].iNext = aHash[h]; aHash[h] = i+1; } /* Begin generating code */ printf("%s", zHdr); printf("/* Hash score: %d */\n", bestCount); | | | 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 | aKeywordTable[i].iNext = aHash[h]; aHash[h] = i+1; } /* Begin generating code */ printf("%s", zHdr); printf("/* Hash score: %d */\n", bestCount); printf("static int keywordCode(const char *z, int n, int *pType){\n"); printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n", totalLen + nKeyword, nChar+1 ); for(i=j=k=0; i<nKeyword; i++){ Keyword *p = &aKeywordTable[i]; if( p->substrId ) continue; memcpy(&zText[k], p->zName, p->len); k += p->len; |
︙ | ︙ | |||
581 582 583 584 585 586 587 | printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" int h, i;\n"); | | | < | | | | | > > | | > > | | 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 | printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" int h, i;\n"); printf(" if( n>=2 ){\n"); printf(" h = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n", bestSize); printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); printf(" if( aLen[i]==n &&" " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); for(i=0; i<nKeyword; i++){ printf(" testcase( i==%d ); /* %s */\n", i, aKeywordTable[i].zOrigName); } printf(" *pType = aCode[i];\n"); printf(" break;\n"); printf(" }\n"); printf(" }\n"); printf(" }\n"); printf(" return n;\n"); printf("}\n"); printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n"); printf(" int id = TK_ID;\n"); printf(" keywordCode((char*)z, n, &id);\n"); printf(" return id;\n"); printf("}\n"); printf("#define SQLITE_N_KEYWORD %d\n", nKeyword); return 0; } |