Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Tighter compression of the keyword hash table. (CVS 3920) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: | 681216767d7fabfccb0b12f6a81b18b6 |
User & Date: | drh 2007-05-04 17:07:53 |
Context
2007-05-04
| ||
18:30 | Change incremental vacuum to be triggered by a pragma rather than a command. We have a lot to learn about this yet and we do not want to paint ourselves into a corner by commiting to specific syntax too early. (CVS 3921) check-in: b13e497a user: drh tags: trunk | |
17:07 | Tighter compression of the keyword hash table. (CVS 3920) check-in: 68121676 user: drh tags: trunk | |
16:14 | Optional parameter in the INCREMENTAL VACUUM statement specifies how many pages to vacuum from the database. (CVS 3919) check-in: ed713f9c user: drh tags: trunk | |
Changes
Changes to tool/mkkeywordhash.c.
11 11 ** A header comment placed at the beginning of generated code. 12 12 */ 13 13 static const char zHdr[] = 14 14 "/***** This file contains automatically generated code ******\n" 15 15 "**\n" 16 16 "** The code in this file has been automatically generated by\n" 17 17 "**\n" 18 - "** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.28 2007/04/26 14:42:36 danielk1977 Exp $\n" 18 + "** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.29 2007/05/04 17:07:53 drh Exp $\n" 19 19 "**\n" 20 20 "** The code in this file implements a function that determines whether\n" 21 21 "** or not a given identifier is really an SQL keyword. The same thing\n" 22 22 "** might be implemented more directly using a hand-written hash table.\n" 23 23 "** But by using this automatically generated code, the size of the code\n" 24 24 "** is substantially reduced. This is important for embedded applications\n" 25 25 "** on platforms with limited memory.\n" ................................................................................ 36 36 char *zTokenType; /* Token value for this keyword */ 37 37 int mask; /* Code this keyword if non-zero */ 38 38 int id; /* Unique ID for this record */ 39 39 int hash; /* Hash on the keyword */ 40 40 int offset; /* Offset to start of name string */ 41 41 int len; /* Length of this keyword, not counting final \000 */ 42 42 int prefix; /* Number of characters in prefix */ 43 + int longestSuffix; /* Longest suffix that is a prefix on another word */ 43 44 int iNext; /* Index in aKeywordTable[] of next with same hash */ 44 45 int substrId; /* Id to another keyword this keyword is embedded in */ 45 46 int substrOffset; /* Offset into substrId for start of this keyword */ 46 47 }; 47 48 48 49 /* 49 50 ** Define masks used to determine which keywords are allowed ................................................................................ 256 257 { "VIEW", "TK_VIEW", VIEW }, 257 258 { "VIRTUAL", "TK_VIRTUAL", VTAB }, 258 259 { "WHEN", "TK_WHEN", ALWAYS }, 259 260 { "WHERE", "TK_WHERE", ALWAYS }, 260 261 }; 261 262 262 263 /* Number of keywords */ 263 -static int NKEYWORD = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); 264 +static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); 264 265 265 266 /* An array to map all upper-case characters into their corresponding 266 267 ** lower-case character. 267 268 */ 268 269 const unsigned char sqlite3UpperToLower[] = { 269 270 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 270 271 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, ................................................................................ 295 296 n = strcmp(pA->zName, pB->zName); 296 297 } 297 298 return n; 298 299 } 299 300 static int keywordCompare2(const void *a, const void *b){ 300 301 const Keyword *pA = (Keyword*)a; 301 302 const Keyword *pB = (Keyword*)b; 302 - int n = strcmp(pA->zName, pB->zName); 303 + int n = pB->longestSuffix - pA->longestSuffix; 304 + if( n==0 ){ 305 + n = strcmp(pA->zName, pB->zName); 306 + } 303 307 return n; 304 308 } 305 309 static int keywordCompare3(const void *a, const void *b){ 306 310 const Keyword *pA = (Keyword*)a; 307 311 const Keyword *pB = (Keyword*)b; 308 312 int n = pA->offset - pB->offset; 309 313 return n; ................................................................................ 310 314 } 311 315 312 316 /* 313 317 ** Return a KeywordTable entry with the given id 314 318 */ 315 319 static Keyword *findById(int id){ 316 320 int i; 317 - for(i=0; i<NKEYWORD; i++){ 321 + for(i=0; i<nKeyword; i++){ 318 322 if( aKeywordTable[i].id==id ) break; 319 323 } 320 324 return &aKeywordTable[i]; 321 325 } 322 326 323 327 /* 324 328 ** This routine does the work. The generated code is printed on standard ................................................................................ 325 329 ** output. 326 330 */ 327 331 int main(int argc, char **argv){ 328 332 int i, j, k, h; 329 333 int bestSize, bestCount; 330 334 int count; 331 335 int nChar; 332 - int aHash[1000]; /* 1000 is much bigger than NKEYWORD */ 336 + int totalLen = 0; 337 + int aHash[1000]; /* 1000 is much bigger than nKeyword */ 333 338 334 339 /* Remove entries from the list of keywords that have mask==0 */ 335 - for(i=j=0; i<NKEYWORD; i++){ 340 + for(i=j=0; i<nKeyword; i++){ 336 341 if( aKeywordTable[i].mask==0 ) continue; 337 342 if( j<i ){ 338 343 aKeywordTable[j] = aKeywordTable[i]; 339 344 } 340 345 j++; 341 346 } 342 - NKEYWORD = j; 347 + nKeyword = j; 343 348 344 349 /* Fill in the lengths of strings and hashes for all entries. */ 345 - for(i=0; i<NKEYWORD; i++){ 350 + for(i=0; i<nKeyword; i++){ 346 351 Keyword *p = &aKeywordTable[i]; 347 352 p->len = strlen(p->zName); 353 + totalLen += p->len; 348 354 p->hash = (UpperToLower[p->zName[0]]*4) ^ 349 355 (UpperToLower[p->zName[p->len-1]]*3) ^ p->len; 350 356 p->id = i+1; 351 357 } 352 358 353 359 /* Sort the table from shortest to longest keyword */ 354 - qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1); 360 + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1); 355 361 356 362 /* Look for short keywords embedded in longer keywords */ 357 - for(i=NKEYWORD-2; i>=0; i--){ 363 + for(i=nKeyword-2; i>=0; i--){ 358 364 Keyword *p = &aKeywordTable[i]; 359 - for(j=NKEYWORD-1; j>i && p->substrId==0; j--){ 365 + for(j=nKeyword-1; j>i && p->substrId==0; j--){ 360 366 Keyword *pOther = &aKeywordTable[j]; 361 367 if( pOther->substrId ) continue; 362 368 if( pOther->len<=p->len ) continue; 363 369 for(k=0; k<=pOther->len-p->len; k++){ 364 370 if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ 365 371 p->substrId = pOther->id; 366 372 p->substrOffset = k; 367 373 break; 368 374 } 369 375 } 370 376 } 371 377 } 372 378 373 - /* Sort the table into alphabetical order */ 374 - qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2); 379 + /* Compute the longestSuffix value for every word */ 380 + for(i=0; i<nKeyword; i++){ 381 + Keyword *p = &aKeywordTable[i]; 382 + if( p->substrId ) continue; 383 + for(j=0; j<nKeyword; j++){ 384 + Keyword *pOther; 385 + if( j==i ) continue; 386 + pOther = &aKeywordTable[j]; 387 + if( pOther->substrId ) continue; 388 + for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){ 389 + if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ 390 + p->longestSuffix = k; 391 + } 392 + } 393 + } 394 + } 395 + 396 + /* Sort the table into reverse order by length */ 397 + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2); 375 398 376 399 /* Fill in the offset for all entries */ 377 400 nChar = 0; 378 - for(i=0; i<NKEYWORD; i++){ 401 + for(i=0; i<nKeyword; i++){ 379 402 Keyword *p = &aKeywordTable[i]; 380 403 if( p->offset>0 || p->substrId ) continue; 381 404 p->offset = nChar; 382 405 nChar += p->len; 383 406 for(k=p->len-1; k>=1; k--){ 384 - for(j=i+1; j<NKEYWORD; j++){ 407 + for(j=i+1; j<nKeyword; j++){ 385 408 Keyword *pOther = &aKeywordTable[j]; 386 409 if( pOther->offset>0 || pOther->substrId ) continue; 387 410 if( pOther->len<=k ) continue; 388 411 if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ 389 412 p = pOther; 390 413 p->offset = nChar - k; 391 414 nChar = p->offset + p->len; ................................................................................ 394 417 p->prefix = k; 395 418 j = i; 396 419 k = p->len; 397 420 } 398 421 } 399 422 } 400 423 } 401 - for(i=0; i<NKEYWORD; i++){ 424 + for(i=0; i<nKeyword; i++){ 402 425 Keyword *p = &aKeywordTable[i]; 403 426 if( p->substrId ){ 404 427 p->offset = findById(p->substrId)->offset + p->substrOffset; 405 428 } 406 429 } 407 430 408 431 /* Sort the table by offset */ 409 - qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3); 432 + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3); 410 433 411 434 /* Figure out how big to make the hash table in order to minimize the 412 435 ** number of collisions */ 413 - bestSize = NKEYWORD; 414 - bestCount = NKEYWORD*NKEYWORD; 415 - for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){ 436 + bestSize = nKeyword; 437 + bestCount = nKeyword*nKeyword; 438 + for(i=nKeyword/2; i<=2*nKeyword; i++){ 416 439 for(j=0; j<i; j++) aHash[j] = 0; 417 - for(j=0; j<NKEYWORD; j++){ 440 + for(j=0; j<nKeyword; j++){ 418 441 h = aKeywordTable[j].hash % i; 419 442 aHash[h] *= 2; 420 443 aHash[h]++; 421 444 } 422 445 for(j=count=0; j<i; j++) count += aHash[j]; 423 446 if( count<bestCount ){ 424 447 bestCount = count; 425 448 bestSize = i; 426 449 } 427 450 } 428 451 429 452 /* Compute the hash */ 430 453 for(i=0; i<bestSize; i++) aHash[i] = 0; 431 - for(i=0; i<NKEYWORD; i++){ 454 + for(i=0; i<nKeyword; i++){ 432 455 h = aKeywordTable[i].hash % bestSize; 433 456 aKeywordTable[i].iNext = aHash[h]; 434 457 aHash[h] = i+1; 435 458 } 436 459 437 460 /* Begin generating code */ 438 461 printf("%s", zHdr); 439 462 printf("/* Hash score: %d */\n", bestCount); 440 463 printf("static int keywordCode(const char *z, int n){\n"); 464 + printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n", 465 + totalLen + nKeyword, nChar+1 ); 441 466 442 467 printf(" static const char zText[%d] =\n", nChar+1); 443 - for(i=j=0; i<NKEYWORD; i++){ 468 + for(i=j=0; i<nKeyword; i++){ 444 469 Keyword *p = &aKeywordTable[i]; 445 470 if( p->substrId ) continue; 446 471 if( j==0 ) printf(" \""); 447 472 printf("%s", p->zName); 448 473 j += p->len; 449 474 if( j>60 ){ 450 475 printf("\"\n"); ................................................................................ 461 486 if( j>12 ){ 462 487 printf("\n"); 463 488 j = 0; 464 489 } 465 490 } 466 491 printf("%s };\n", j==0 ? "" : "\n"); 467 492 468 - printf(" static const unsigned char aNext[%d] = {\n", NKEYWORD); 469 - for(i=j=0; i<NKEYWORD; i++){ 493 + printf(" static const unsigned char aNext[%d] = {\n", nKeyword); 494 + for(i=j=0; i<nKeyword; i++){ 470 495 if( j==0 ) printf(" "); 471 496 printf(" %3d,", aKeywordTable[i].iNext); 472 497 j++; 473 498 if( j>12 ){ 474 499 printf("\n"); 475 500 j = 0; 476 501 } 477 502 } 478 503 printf("%s };\n", j==0 ? "" : "\n"); 479 504 480 - printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD); 481 - for(i=j=0; i<NKEYWORD; i++){ 505 + printf(" static const unsigned char aLen[%d] = {\n", nKeyword); 506 + for(i=j=0; i<nKeyword; i++){ 482 507 if( j==0 ) printf(" "); 483 508 printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix); 484 509 j++; 485 510 if( j>12 ){ 486 511 printf("\n"); 487 512 j = 0; 488 513 } 489 514 } 490 515 printf("%s };\n", j==0 ? "" : "\n"); 491 516 492 - printf(" static const unsigned short int aOffset[%d] = {\n", NKEYWORD); 493 - for(i=j=0; i<NKEYWORD; i++){ 517 + printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword); 518 + for(i=j=0; i<nKeyword; i++){ 494 519 if( j==0 ) printf(" "); 495 520 printf(" %3d,", aKeywordTable[i].offset); 496 521 j++; 497 522 if( j>12 ){ 498 523 printf("\n"); 499 524 j = 0; 500 525 } 501 526 } 502 527 printf("%s };\n", j==0 ? "" : "\n"); 503 528 504 - printf(" static const unsigned char aCode[%d] = {\n", NKEYWORD); 505 - for(i=j=0; i<NKEYWORD; i++){ 529 + printf(" static const unsigned char aCode[%d] = {\n", nKeyword); 530 + for(i=j=0; i<nKeyword; i++){ 506 531 char *zToken = aKeywordTable[i].zTokenType; 507 532 if( j==0 ) printf(" "); 508 533 printf("%s,%*s", zToken, (int)(14-strlen(zToken)), ""); 509 534 j++; 510 535 if( j>=5 ){ 511 536 printf("\n"); 512 537 j = 0;