Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enhance Lemon so that it reorders the reduce rules such that rules without actions occur at the end and so that the first rule is number 0. This reduces the size of the jump table on the reduce switch, and helps the parser to run faster. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
d5712f21ec758ff096a7b1bb8ed4fc5e |
User & Date: | drh 2016-03-16 19:45:54.638 |
Context
2016-03-16
| ||
19:53 | Add a cast to an implict (size_t -> int) conversion in fts5_expr.c. (check-in: d9b5ff7aba user: dan tags: trunk) | |
19:45 | Enhance Lemon so that it reorders the reduce rules such that rules without actions occur at the end and so that the first rule is number 0. This reduces the size of the jump table on the reduce switch, and helps the parser to run faster. (check-in: d5712f21ec user: drh tags: trunk) | |
19:10 | Avoid a few unnecessary fstat()s on journal files. (check-in: dbf8470591 user: drh tags: trunk) | |
Changes
Changes to tool/lemon.c.
︙ | ︙ | |||
286 287 288 289 290 291 292 293 294 295 296 297 298 299 | const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ const char *code; /* The code executed when this rule is reduced */ const char *codePrefix; /* Setup code before code[] above */ const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ Boolean canReduce; /* True if this rule is ever reduced */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; /* A configuration is a production rule of the grammar together with ** a mark (dot) showing how much of that rule has been processed so far. | > | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ const char *code; /* The code executed when this rule is reduced */ const char *codePrefix; /* Setup code before code[] above */ const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ int iRule; /* Rule number as used in the generated tables */ Boolean canReduce; /* True if this rule is ever reduced */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; /* A configuration is a production rule of the grammar together with ** a mark (dot) showing how much of that rule has been processed so far. |
︙ | ︙ | |||
368 369 370 371 372 373 374 375 376 377 378 379 380 381 | /* The state vector for the entire parser generator is recorded as ** follows. (LEMON uses no global variables and makes little use of ** static variables. Fields in the following structure can be thought ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ int nstate; /* Number of states */ int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ | > | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | /* The state vector for the entire parser generator is recorded as ** follows. (LEMON uses no global variables and makes little use of ** static variables. Fields in the following structure can be thought ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ struct rule *startRule; /* First rule */ int nstate; /* Number of states */ int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ |
︙ | ︙ | |||
854 855 856 857 858 859 860 | /* Find the start symbol */ if( lemp->start ){ sp = Symbol_find(lemp->start); if( sp==0 ){ ErrorMsg(lemp->filename,0, "The specified start symbol \"%s\" is not \ in a nonterminal of the grammar. \"%s\" will be used as the start \ | | | | | 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 | /* Find the start symbol */ if( lemp->start ){ sp = Symbol_find(lemp->start); if( sp==0 ){ ErrorMsg(lemp->filename,0, "The specified start symbol \"%s\" is not \ in a nonterminal of the grammar. \"%s\" will be used as the start \ symbol instead.",lemp->start,lemp->startRule->lhs->name); lemp->errorcnt++; sp = lemp->startRule->lhs; } }else{ sp = lemp->startRule->lhs; } /* Make sure the start symbol doesn't occur on the right-hand side of ** any rule. Report an error if it does. (YACC would generate a new ** start symbol in this case.) */ for(rp=lemp->rule; rp; rp=rp->next){ int i; |
︙ | ︙ | |||
1113 1114 1115 1116 1117 1118 1119 | } } } /* Add the accepting token */ if( lemp->start ){ sp = Symbol_find(lemp->start); | | | | 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 | } } } /* Add the accepting token */ if( lemp->start ){ sp = Symbol_find(lemp->start); if( sp==0 ) sp = lemp->startRule->lhs; }else{ sp = lemp->startRule->lhs; } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); /* Resolve conflicts */ |
︙ | ︙ | |||
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 | static void handle_T_option(char *z){ user_templatename = (char *) malloc( lemonStrlen(z)+1 ); if( user_templatename==0 ){ memory_error(); } lemon_strcpy(user_templatename, z); } /* forward reference */ static const char *minimum_size_type(int lwr, int upr, int *pnByte); /* Print a single line of the "Parser Stats" output */ static void stats_line(const char *zLabel, int iValue){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 | static void handle_T_option(char *z){ user_templatename = (char *) malloc( lemonStrlen(z)+1 ); if( user_templatename==0 ){ memory_error(); } lemon_strcpy(user_templatename, z); } /* Merge together to lists of rules order by rule.iRule */ static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ struct rule *pFirst = 0; struct rule **ppPrev = &pFirst; while( pA && pB ){ if( pA->iRule<pB->iRule ){ *ppPrev = pA; ppPrev = &pA->next; pA = pA->next; }else{ *ppPrev = pB; ppPrev = &pB->next; pB = pB->next; } } if( pA ){ *ppPrev = pA; }else{ *ppPrev = pB; } return pFirst; } /* ** Sort a list of rules in order of increasing iRule value */ static struct rule *Rule_sort(struct rule *rp){ int i; struct rule *pNext; struct rule *x[32]; memset(x, 0, sizeof(x)); while( rp ){ pNext = rp->next; rp->next = 0; for(i=0; i<sizeof(x)/sizeof(x[0]) && x[i]; i++){ rp = Rule_merge(x[i], rp); x[i] = 0; } x[i] = rp; rp = pNext; } rp = 0; for(i=0; i<sizeof(x)/sizeof(x[0]); i++){ rp = Rule_merge(x[i], rp); } return rp; } /* forward reference */ static const char *minimum_size_type(int lwr, int upr, int *pnByte); /* Print a single line of the "Parser Stats" output */ static void stats_line(const char *zLabel, int iValue){ |
︙ | ︙ | |||
1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 | {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, {OPT_FSTR, "W", 0, "Ignored. (Placeholder for '-W' compiler options.)"}, {OPT_FLAG,0,0,0} }; int i; int exitcode; struct lemon lem; OptInit(argv,options,stderr); if( version ){ printf("Lemon version 1.0\n"); exit(0); } if( OptNArgs()!=1 ){ | > | 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 | {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, {OPT_FSTR, "W", 0, "Ignored. (Placeholder for '-W' compiler options.)"}, {OPT_FLAG,0,0,0} }; int i; int exitcode; struct lemon lem; struct rule *rp; OptInit(argv,options,stderr); if( version ){ printf("Lemon version 1.0\n"); exit(0); } if( OptNArgs()!=1 ){ |
︙ | ︙ | |||
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 | qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); lem.nsymbol = i - 1; for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); lem.nterminal = i; /* Generate a reprint of the grammar, if requested on the command line */ if( rpflag ){ Reprint(&lem); }else{ /* Initialize the size for all follow and first sets */ SetSize(lem.nterminal+1); | > > > > > > > > > > | 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 | qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); lem.nsymbol = i - 1; for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); lem.nterminal = i; /* Assign sequential rule numbers */ for(i=0, rp=lem.rule; rp; rp=rp->next){ rp->iRule = rp->code ? i++ : -1; } for(rp=lem.rule; rp; rp=rp->next){ if( rp->iRule<0 ) rp->iRule = i++; } lem.startRule = lem.rule; lem.rule = Rule_sort(lem.rule); /* Generate a reprint of the grammar, if requested on the command line */ if( rpflag ){ Reprint(&lem); }else{ /* Initialize the size for all follow and first sets */ SetSize(lem.nterminal+1); |
︙ | ︙ | |||
3050 3051 3052 3053 3054 3055 3056 | case SHIFT: { struct state *stp = ap->x.stp; fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); break; } case REDUCE: { struct rule *rp = ap->x.rp; | | | | | | 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 | case SHIFT: { struct state *stp = ap->x.stp; fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); break; } case REDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case SHIFTREDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case ACCEPT: fprintf(fp,"%*s accept",indent,ap->sp->name); break; case ERROR: fprintf(fp,"%*s error",indent,ap->sp->name); break; case SRCONFLICT: case RRCONFLICT: fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.rp->iRule); break; case SSCONFLICT: fprintf(fp,"%*s shift %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.stp->statenum); break; case SH_RESOLVED: if( showPrecedenceConflict ){ fprintf(fp,"%*s shift %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.stp->statenum); }else{ result = 0; } break; case RD_RESOLVED: if( showPrecedenceConflict ){ fprintf(fp,"%*s reduce %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.rp->iRule); }else{ result = 0; } break; case NOT_USED: result = 0; break; |
︙ | ︙ | |||
3117 3118 3119 3120 3121 3122 3123 | stp = lemp->sorted[i]; fprintf(fp,"State %d:\n",stp->statenum); if( lemp->basisflag ) cfp=stp->bp; else cfp=stp->cfp; while( cfp ){ char buf[20]; if( cfp->dot==cfp->rp->nrhs ){ | | | 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 | stp = lemp->sorted[i]; fprintf(fp,"State %d:\n",stp->statenum); if( lemp->basisflag ) cfp=stp->bp; else cfp=stp->cfp; while( cfp ){ char buf[20]; if( cfp->dot==cfp->rp->nrhs ){ lemon_sprintf(buf,"(%d)",cfp->rp->iRule); fprintf(fp," %5s ",buf); }else{ fprintf(fp," "); } ConfigPrint(fp,cfp); fprintf(fp,"\n"); #if 0 |
︙ | ︙ | |||
3218 3219 3220 3221 3222 3223 3224 | ** Return negative if no action should be generated. */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch( ap->type ){ case SHIFT: act = ap->x.stp->statenum; break; | | | | 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 | ** Return negative if no action should be generated. */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch( ap->type ){ case SHIFT: act = ap->x.stp->statenum; break; case SHIFTREDUCE: act = ap->x.rp->iRule + lemp->nstate; break; case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; case ERROR: act = lemp->nstate + lemp->nrule*2; break; case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; default: act = -1; break; } return act; } |
︙ | ︙ | |||
4237 4238 4239 4240 4241 4242 4243 | tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ | | | 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 | tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ assert( rp->iRule==i ); fprintf(out," /* %3d */ \"", i); writeRuleText(out, rp); fprintf(out,"\",\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes every time a symbol is popped from |
︙ | ︙ | |||
4333 4334 4335 4336 4337 4338 4339 | fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; } /* First output rules other than the default: rule */ for(rp=lemp->rule; rp; rp=rp->next){ struct rule *rp2; /* Other rules with the same action */ if( rp->code==0 ) continue; if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ | | | | | | | 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 | fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; } /* First output rules other than the default: rule */ for(rp=lemp->rule; rp; rp=rp->next){ struct rule *rp2; /* Other rules with the same action */ if( rp->code==0 ) continue; if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ fprintf(out," case %d: /* ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */\n"); lineno++; for(rp2=rp->next; rp2; rp2=rp2->next){ if( rp2->code==rp->code ){ fprintf(out," case %d: /* ", rp2->iRule); writeRuleText(out, rp2); fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; rp2->code = 0; } } emit_code(out,rp,lemp,&lineno); fprintf(out," break;\n"); lineno++; rp->code = 0; } /* Finally, output the default: rule. We choose as the default: all ** empty actions. */ fprintf(out," default:\n"); lineno++; for(rp=lemp->rule; rp; rp=rp->next){ if( rp->code==0 ) continue; assert( rp->code[0]=='\n' && rp->code[1]==0 ); fprintf(out," /* (%d) ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; } fprintf(out," break;\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes if a parse fails */ tplt_print(out,lemp,lemp->failure,&lineno); tplt_xfer(lemp->name,in,out,&lineno); |
︙ | ︙ |
Changes to tool/lempar.c.
︙ | ︙ | |||
414 415 416 417 418 419 420 | } #endif /* ** Find the appropriate action for a parser given the terminal ** look-ahead token iLookAhead. */ | | | 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 | } #endif /* ** Find the appropriate action for a parser given the terminal ** look-ahead token iLookAhead. */ static unsigned int yy_find_shift_action( yyParser *pParser, /* The parser */ YYCODETYPE iLookAhead /* The look-ahead token */ ){ int i; int stateno = pParser->yystack[pParser->yyidx].stateno; if( stateno>=YY_MIN_REDUCE ) return stateno; |
︙ | ︙ | |||
602 603 604 605 606 607 608 | /* ** Perform a reduce action and the shift that must immediately ** follow the reduce. */ static void yy_reduce( yyParser *yypParser, /* The parser */ | | < | | 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 | /* ** Perform a reduce action and the shift that must immediately ** follow the reduce. */ static void yy_reduce( yyParser *yypParser, /* The parser */ unsigned int yyruleno /* Number of the rule by which to reduce */ ){ int yygoto; /* The next state */ int yyact; /* The next action */ yyStackEntry *yymsp; /* The top of the parser's stack */ int yysize; /* Amount to pop the stack */ ParseARG_FETCH; yymsp = &yypParser->yystack[yypParser->yyidx]; #ifndef NDEBUG if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ yysize = yyRuleInfo[yyruleno].nrhs; fprintf(yyTraceFILE, "%sReduce [%s], go to state %d.\n", yyTracePrompt, yyRuleName[yyruleno], yymsp[-yysize].stateno); } #endif /* NDEBUG */ /* Check that the stack is large enough to grow by a single entry |
︙ | ︙ | |||
657 658 659 660 661 662 663 | ** #line <lineno> <thisfile> ** break; */ /********** Begin reduce actions **********************************************/ %% /********** End reduce actions ************************************************/ }; | | | 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 | ** #line <lineno> <thisfile> ** break; */ /********** Begin reduce actions **********************************************/ %% /********** End reduce actions ************************************************/ }; assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) ); yygoto = yyRuleInfo[yyruleno].lhs; yysize = yyRuleInfo[yyruleno].nrhs; yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); if( yyact <= YY_MAX_SHIFTREDUCE ){ if( yyact>YY_MAX_SHIFT ) yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; yypParser->yyidx -= yysize - 1; yymsp -= yysize-1; |
︙ | ︙ | |||
761 762 763 764 765 766 767 | void Parse( void *yyp, /* The parser */ int yymajor, /* The major token code number */ ParseTOKENTYPE yyminor /* The value for the token */ ParseARG_PDECL /* Optional %extra_argument parameter */ ){ YYMINORTYPE yyminorunion; | | | 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 | void Parse( void *yyp, /* The parser */ int yymajor, /* The major token code number */ ParseTOKENTYPE yyminor /* The value for the token */ ParseARG_PDECL /* Optional %extra_argument parameter */ ){ YYMINORTYPE yyminorunion; unsigned int yyact; /* The parser action. */ #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) int yyendofinput; /* True if we are at the end of input */ #endif #ifdef YYERRORSYMBOL int yyerrorhit = 0; /* True if yymajor has invoked an error */ #endif yyParser *yypParser; /* The parser */ |
︙ | ︙ |