Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add code to get the tokenizer character-class logic working for EBCDIC. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | tokenizer-char-class |
Files: | files | file ages | folders |
SHA1: |
04f7da77c13925c1f1e287f4579bb855 |
User & Date: | drh 2016-02-08 19:15:48.295 |
Context
2016-02-08
| ||
19:36 | Changes to help the tokenizer run about 33% faster. (check-in: a050e6f096 user: drh tags: trunk) | |
19:15 | Add code to get the tokenizer character-class logic working for EBCDIC. (Closed-Leaf check-in: 04f7da77c1 user: drh tags: tokenizer-char-class) | |
03:23 | Faster keywordCode() implementation by taking advantage of the fact that the input is always pure ASCII alphabetic and underscore and that the keyword table is always upper-case. (check-in: ff406b9701 user: drh tags: tokenizer-char-class) | |
Changes
Changes to src/tokenize.c.
︙ | ︙ | |||
14 15 16 17 18 19 20 | ** This file contains C code that splits an SQL input string up into ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. */ #include "sqliteInt.h" #include <stdlib.h> | | > > > > > > > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | ** This file contains C code that splits an SQL input string up into ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. */ #include "sqliteInt.h" #include <stdlib.h> /* Character classes for tokenizing ** ** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented ** using a lookup table, whereas a switch() directly on c uses a binary search. ** The lookup table is much faster. To maximize speed, and to ensure that ** a lookup table is used, all of the classes need to be small integers and ** all of them need to be used within the switch. */ #define CC_X 0 /* The letter 'x' or 'X'. Start of x'01234fed' */ #define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */ #define CC_ID 2 /* unicode characters usable in IDs */ #define CC_DIGIT 3 /* Digits */ #define CC_DOLLAR 4 /* '$' */ #define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */ #define CC_VARNUM 6 /* '?'. Numeric SQL variables */ |
︙ | ︙ | |||
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | #define CC_COMMA 23 /* ',' */ #define CC_AND 24 /* '&' */ #define CC_TILDA 25 /* '~' */ #define CC_DOT 26 /* '.' */ #define CC_ILLEGAL 27 /* Illegal character */ static const unsigned char aiClass[] = { /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ /* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27, /* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, /* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, /* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1, /* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27, /* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; /* | > > > > > > > > > > > > > > > > > > > > > | | > | > | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | #define CC_COMMA 23 /* ',' */ #define CC_AND 24 /* '&' */ #define CC_TILDA 25 /* '~' */ #define CC_DOT 26 /* '.' */ #define CC_ILLEGAL 27 /* Illegal character */ static const unsigned char aiClass[] = { #ifdef SQLITE_ASCII /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ /* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27, /* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, /* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, /* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1, /* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27, /* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 #endif #ifdef SQLITE_EBCDIC /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ /* 0x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 7, 7, 27, 27, /* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 2x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 3x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 4x */ 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 12, 17, 20, 10, /* 5x */ 24, 27, 27, 27, 27, 27, 27, 27, 27, 27, 15, 4, 21, 18, 19, 27, /* 6x */ 11, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 23, 22, 1, 13, 7, /* 7x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 8, 5, 5, 5, 8, 14, 8, /* 8x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, /* 9x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, /* 9x */ 25, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, /* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 27, 27, 27, 27, 27, /* Cx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, /* Dx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, /* Ex */ 27, 27, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, /* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 27, 27, 27, 27, 27, #endif }; /* ** The charMap() macro maps alphabetic characters (only) into their ** lower-case ASCII equivalent. On ASCII machines, this is just ** an upper-to-lower case map. On EBCDIC machines we also need ** to adjust the encoding. The mapping is only valid for alphabetics ** which are the only characters for which this feature is used. ** ** Used by keywordhash.h */ #ifdef SQLITE_ASCII # define charMap(X) sqlite3UpperToLower[(unsigned char)X] #endif #ifdef SQLITE_EBCDIC # define charMap(X) ebcdicToAscii[(unsigned char)X] const unsigned char ebcdicToAscii[] = { |
︙ | ︙ | |||
406 407 408 409 410 411 412 | } i = 1; break; } #endif case CC_KYWD: { for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} | | | | 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 | } i = 1; break; } #endif case CC_KYWD: { for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} if( IdChar(z[i]) ){ i++; break; } *tokenType = TK_ID; return keywordCode((char*)z, i, tokenType); } case CC_ID: { i = 1; break; } default: { *tokenType = TK_ILLEGAL; return 1; } } while( IdChar(z[i]) ){ i++; } *tokenType = TK_ID; return i; } /* ** Run the parser on the given SQL string. The parser structure is ** passed in. An SQLITE_ status code is returned. If an error occurs |
︙ | ︙ |
Changes to tool/mkkeywordhash.c.
︙ | ︙ | |||
273 274 275 276 277 278 279 | { "WHEN", "TK_WHEN", ALWAYS }, { "WHERE", "TK_WHERE", ALWAYS }, }; /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); | | > > > | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 | { "WHEN", "TK_WHEN", ALWAYS }, { "WHERE", "TK_WHERE", ALWAYS }, }; /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); /* Map all alphabetic characters into lower-case for hashing. This is ** only valid for alphabetics. In particular it does not work for '_' ** and so the hash cannot be on a keyword position that might be an '_'. */ #define charMap(X) (0x20|(X)) /* ** Comparision function for two Keyword records */ static int keywordCompare1(const void *a, const void *b){ const Keyword *pA = (Keyword*)a; |
︙ | ︙ | |||
561 562 563 564 565 566 567 | if( j>=5 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); | | | | > > > > > | 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 | if( j>=5 ){ printf("\n"); j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); printf(" int i, j;\n"); printf(" const char *zKW;\n"); printf(" if( n>=2 ){\n"); printf(" i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n", bestSize); printf(" for(i=((int)aHash[i])-1; i>=0; i=((int)aNext[i])-1){\n"); printf(" if( aLen[i]!=n ) continue;\n"); printf(" j = 0;\n"); printf(" zKW = &zText[aOffset[i]];\n"); printf("#ifdef SQLITE_ASCII\n"); printf(" while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n"); printf("#endif\n"); printf("#ifdef SQLITE_EBCDIC\n"); printf(" while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n"); printf("#endif\n"); printf(" if( j<n ) continue;\n"); for(i=0; i<nKeyword; i++){ printf(" testcase( i==%d ); /* %s */\n", i, aKeywordTable[i].zOrigName); } printf(" *pType = aCode[i];\n"); printf(" break;\n"); |
︙ | ︙ |