Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enhance LEMON so that it generates the action table in such a way that no range check is needed on the lookahead table to verify that the next input token is valid. This makes the lookahead table slightly larger (about 120 bytes) but helps the parser to run faster. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | lemon-improvements |
Files: | files | file ages | folders |
SHA3-256: |
7eb0198d0102e97e4b7ad9e359d95985 |
User & Date: | drh 2017-12-25 04:15:38.668 |
Context
2017-12-26
| ||
18:04 | Add support for measuring and reporting coverage of the parser state machine using the SQLITE_TESTCTRL_PARSER_COVERAGE test-control. (check-in: 1253a872db user: drh tags: lemon-improvements) | |
2017-12-25
| ||
04:15 | Enhance LEMON so that it generates the action table in such a way that no range check is needed on the lookahead table to verify that the next input token is valid. This makes the lookahead table slightly larger (about 120 bytes) but helps the parser to run faster. (check-in: 7eb0198d01 user: drh tags: lemon-improvements) | |
00:10 | In the LEMON-generated parser, avoid unnecessary tests for the acceptance state. (check-in: fdbb35c54f user: drh tags: lemon-improvements) | |
Changes
Changes to tool/lemon.c.
︙ | ︙ | |||
409 410 411 412 413 414 415 416 417 418 419 420 421 422 | char *tokendest; /* Code to execute to destroy token data */ char *vardest; /* Code for the default non-terminal destructor */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; | > | 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 | char *tokendest; /* Code to execute to destroy token data */ char *vardest; /* Code for the default non-terminal destructor */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; |
︙ | ︙ | |||
585 586 587 588 589 590 591 592 593 594 | *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ }; /* Return the number of entries in the yy_action table */ | > > | | > > | 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 | *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ int nterminal; /* Number of terminal symbols */ int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ #define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) /* The value for the N-th entry in yy_lookahead */ #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) /* Free all memory associated with the given acttab */ void acttab_free(acttab *p){ free( p->aAction ); free( p->aLookahead ); free( p ); } /* Allocate a new acttab structure */ acttab *acttab_alloc(int nsymbol, int nterminal){ acttab *p = (acttab *) calloc( 1, sizeof(*p) ); if( p==0 ){ fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); p->nsymbol = nsymbol; p->nterminal = nterminal; return p; } /* Add a new action to the current transaction set. ** ** This routine is called once for each lookahead for a particular ** state. |
︙ | ︙ | |||
651 652 653 654 655 656 657 658 | /* ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. */ | > > > > > > > > | | | | 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 | /* ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. ** ** If the makeItSafe parameter is true, then the offset is chosen so that ** it is impossible to overread the yy_lookaside[] table regardless of ** the lookaside token. This is done for the terminal symbols, as they ** come from external inputs and can contain syntax errors. When makeItSafe ** is false, there is more flexibility in selecting offsets, resulting in ** a smaller table. For non-terminal symbols, which are never syntax errors, ** makeItSafe can be false. */ int acttab_insert(acttab *p, int makeItSafe){ int i, j, k, n, end; assert( p->nLookahead>0 ); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; p->aAction = (struct lookahead_action *) realloc( p->aAction, sizeof(p->aAction[0])*p->nActionAlloc); if( p->aAction==0 ){ fprintf(stderr,"malloc failed\n"); |
︙ | ︙ | |||
682 683 684 685 686 687 688 | /* Scan the existing action table looking for an offset that is a ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ | > | | 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 | /* Scan the existing action table looking for an offset that is a ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ end = makeItSafe ? p->mnLookahead : 0; for(i=p->nAction-1; i>=end; i--){ if( p->aAction[i].lookahead==p->mnLookahead ){ /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ if( p->aAction[i].action!=p->mnAction ) continue; for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; if( k<0 || k>=p->nAction ) break; |
︙ | ︙ | |||
712 713 714 715 716 717 718 | } } /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ | | > | > > > > > > > > > > > > > > > > > > | 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 | } } /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ if( i<end ){ /* Look for holes in the aAction[] table that fit the current ** aLookahead[] transaction. Leave i set to the offset of the hole. ** If no holes are found, i is left at p->nAction, which means the ** transaction will be appended. */ i = makeItSafe ? p->mnLookahead : 0; for(; i<p->nActionAlloc - p->mxLookahead; i++){ if( p->aAction[i].lookahead<0 ){ for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; if( k<0 ) break; if( p->aAction[k].lookahead>=0 ) break; } if( j<p->nLookahead ) continue; for(j=0; j<p->nAction; j++){ if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; } if( j==p->nAction ){ break; /* Fits in empty slots */ } } } } /* Insert transaction set at index i. */ #if 0 printf("Acttab:"); for(j=0; j<p->nLookahead; j++){ printf(" %d", p->aLookahead[j].lookahead); } printf(" inserted at %d\n", i); #endif for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } if( makeItSafe && i+p->nterminal>p->nAction ) p->nAction = i+p->nterminal; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the ** index into yy_action of the action */ return i - p->mnLookahead; } /* ** Return the size of the action table without the trailing syntax error ** entries. */ int acttab_action_size(acttab *p){ int n = p->nAction; while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } return n; } /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON ** parser generator. */ |
︙ | ︙ | |||
1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 | stats_line("terminal symbols", lem.nterminal); stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } /* return 0 on success, 1 on failure. */ | > | 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 | stats_line("terminal symbols", lem.nterminal); stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } /* return 0 on success, 1 on failure. */ |
︙ | ︙ | |||
4163 4164 4165 4166 4167 4168 4169 | } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); | | | | | 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 | } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ for(ap=stp->ap; ap; ap=ap->next){ int action; if( ap->sp->index>=lemp->nterminal ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ for(ap=stp->ap; ap; ap=ap->next){ int action; if( ap->sp->index<lemp->nterminal ) continue; if( ap->sp->index==lemp->nsymbol ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ { int jj, nn; for(jj=nn=0; jj<pActtab->nAction; jj++){ if( pActtab->aAction[jj].action<0 ) nn++; |
︙ | ︙ | |||
4245 4246 4247 4248 4249 4250 4251 | ** shifting terminals. ** yy_reduce_ofst[] For each state, the offset into yy_action for ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ | | > < | 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 | ** shifting terminals. ** yy_reduce_ofst[] For each state, the offset into yy_action for ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n*szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for(i=j=0; i<n; i++){ int action = acttab_yyaction(pActtab, i); if( action<0 ) action = lemp->noAction; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", action); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n*szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; i<n; i++){ int la = acttab_yylookahead(pActtab, i); if( la<0 ) la = lemp->nsymbol; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", la); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; fprintf(out, "static const %s yy_shift_ofst[] = {\n", minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); lineno++; lemp->tablesize += n*sz; |
︙ | ︙ | |||
4308 4309 4310 4311 4312 4313 4314 | }else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ | < | 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 | }else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; fprintf(out, "static const %s yy_reduce_ofst[] = {\n", minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; |
︙ | ︙ | |||
4378 4379 4380 4381 4382 4383 4384 | } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing the symbolic name of every symbol */ for(i=0; i<lemp->nsymbol; i++){ lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); | | < < | 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 | } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing the symbolic name of every symbol */ for(i=0; i<lemp->nsymbol; i++){ lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ |
︙ | ︙ |
Changes to tool/lempar.c.
︙ | ︙ | |||
127 128 129 130 131 132 133 | ** ** The action table is constructed as a single large table named yy_action[]. ** Given state S and lookahead X, the action is computed as either: ** ** (A) N = yy_action[ yy_shift_ofst[S] + X ] ** (B) N = yy_default[S] ** | | < | < < < < | < | 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | ** ** The action table is constructed as a single large table named yy_action[]. ** Given state S and lookahead X, the action is computed as either: ** ** (A) N = yy_action[ yy_shift_ofst[S] + X ] ** (B) N = yy_default[S] ** ** The (A) formula is preferred. The B formula is used instead if ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X. ** ** The formulas above are for computing the action when the lookahead is ** a terminal symbol. If the lookahead is a non-terminal (as occurs after ** a reduce action) then the yy_reduce_ofst[] array is used in place of ** the yy_shift_ofst[] array. ** ** The following are the tables generated in this section: ** ** yy_action[] A single table containing all actions. ** yy_lookahead[] A table containing the lookahead for each entry in ** yy_action. Used to detect hash collisions. ** yy_shift_ofst[] For each state, the offset into yy_action for |
︙ | ︙ | |||
474 475 476 477 478 479 480 | if( stateno>YY_MAX_SHIFT ) return stateno; assert( stateno <= YY_SHIFT_COUNT ); do{ i = yy_shift_ofst[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; | > | | 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 | if( stateno>YY_MAX_SHIFT ) return stateno; assert( stateno <= YY_SHIFT_COUNT ); do{ i = yy_shift_ofst[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; assert( i>=0 && i<sizeof(yy_lookahead)/sizeof(yy_lookahead[0]) ); if( yy_lookahead[i]!=iLookAhead ){ #ifdef YYFALLBACK YYCODETYPE iFallback; /* Fallback token */ if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0]) && (iFallback = yyFallback[iLookAhead])!=0 ){ #ifndef NDEBUG if( yyTraceFILE ){ fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n", |
︙ | ︙ | |||
537 538 539 540 541 542 543 | if( stateno>YY_REDUCE_COUNT ){ return yy_default[stateno]; } #else assert( stateno<=YY_REDUCE_COUNT ); #endif i = yy_reduce_ofst[stateno]; | < | 532 533 534 535 536 537 538 539 540 541 542 543 544 545 | if( stateno>YY_REDUCE_COUNT ){ return yy_default[stateno]; } #else assert( stateno<=YY_REDUCE_COUNT ); #endif i = yy_reduce_ofst[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; #ifdef YYERRORSYMBOL if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ return yy_default[stateno]; } #else |
︙ | ︙ |