/ Check-in [c0d1b269]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix bugs in lemon associated with the change to a perfect hash table. (CVS 1113)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c0d1b26966aeb445fea5792e5a9e93632e758c2a
User & Date: drh 2003-10-21 16:34:42
Context
2003-10-22
22:15
Comment changes to the lemon parser template. Change some sqliteMalloc() calls to sqliteMallocRaw() for speed. Update the website template. (CVS 1114) check-in: c637caf1 user: drh tags: trunk
2003-10-21
16:34
Fix bugs in lemon associated with the change to a perfect hash table. (CVS 1113) check-in: c0d1b269 user: drh tags: trunk
13:16
Convert lemon to use a single perfect hash table for storing the actions. This should make the resulting parser both smaller and faster. (CVS 1112) check-in: 4f955c00 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/lemon.c.

   489    489   
   490    490     /* Make sure we have enough space to hold the expanded action table
   491    491     ** in the worst case.  The worst case occurs if the transaction set
   492    492     ** must be appended to the current action table
   493    493     */
   494    494     n = p->mxLookahead - p->mnLookahead + 1;
   495    495     if( p->nAction + n >= p->nActionAlloc ){
          496  +    int oldAlloc = p->nActionAlloc;
   496    497       p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
   497    498       p->aAction = realloc( p->aAction,
   498    499                             sizeof(p->aAction[0])*p->nActionAlloc);
   499    500       if( p->aAction==0 ){
   500    501         fprintf(stderr,"malloc failed\n");
   501    502         exit(1);
   502    503       }
   503         -    for(i=p->nAction; i<p->nActionAlloc; i++){
          504  +    for(i=oldAlloc; i<p->nActionAlloc; i++){
   504    505         p->aAction[i].lookahead = -1;
   505    506         p->aAction[i].action = -1;
   506    507       }
   507    508     }
   508    509   
   509    510     /* Scan the existing action table looking for an offset where we can
   510    511     ** insert the current transaction set.  Fall out of the loop when that
................................................................................
   516    517     for(i=0; i<p->nAction; i++){
   517    518       if( p->aAction[i].lookahead<0 ){
   518    519         for(j=0; j<p->nLookahead; j++){
   519    520           k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   520    521           if( k<0 ) break;
   521    522           if( p->aAction[k].lookahead>=0 ) break;
   522    523         }
   523         -      if( j==p->nLookahead ) break;  /* Fits in empty slots */
          524  +      if( j<p->nLookahead ) continue;
          525  +      for(j=0; j<p->nAction; j++){
          526  +        if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
          527  +      }
          528  +      if( j==p->nAction ){
          529  +        break;  /* Fits in empty slots */
          530  +      }
   524    531       }else if( p->aAction[i].lookahead==p->mnLookahead ){
   525    532         if( p->aAction[i].action!=p->mnAction ) continue;
   526    533         for(j=0; j<p->nLookahead; j++){
   527    534           k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   528    535           if( k<0 || k>=p->nAction ) break;
   529    536           if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
   530    537           if( p->aLookahead[j].action!=p->aAction[k].action ) break;
   531    538         }
   532    539         if( j<p->nLookahead ) continue;
   533    540         n = 0;
   534    541         for(j=0; j<p->nAction; j++){
   535         -        if( p->aAction[j].lookahead==j+i-p->mnLookahead ) n++;
          542  +        if( p->aAction[j].lookahead<0 ) continue;
          543  +        if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
   536    544         }
   537         -      if( n==p->nLookahead ) break;  /* Same as a prior transaction set */
          545  +      if( n==p->nLookahead ){
          546  +        break;  /* Same as a prior transaction set */
          547  +      }
   538    548       }
   539    549     }
   540    550     /* Insert transaction set at index i. */
   541    551     for(j=0; j<p->nLookahead; j++){
   542    552       k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   543    553       p->aAction[k] = p->aLookahead[j];
   544    554       if( k>=p->nAction ) p->nAction = k+1;
................................................................................
  3120   3130       return "signed char";
  3121   3131     }else if( lwr>=-32767 && upr<32767 ){
  3122   3132       return "short";
  3123   3133     }else{
  3124   3134       return "int";
  3125   3135     }
  3126   3136   }
         3137  +
         3138  +/*
         3139  +** Each state contains a set of token transaction and a set of
         3140  +** nonterminal transactions.  Each of these sets makes an instance
         3141  +** of the following structure.  An array of these structures is used
         3142  +** to order the creation of entries in the yy_action[] table.
         3143  +*/
         3144  +struct axset {
         3145  +  struct state *stp;   /* A pointer to a state */
         3146  +  int isTkn;           /* True to use tokens.  False for non-terminals */
         3147  +  int nAction;         /* Number of actions */
         3148  +};
         3149  +
         3150  +/*
         3151  +** Compare to axset structures for sorting purposes
         3152  +*/
         3153  +static int axset_compare(const void *a, const void *b){
         3154  +  struct axset *p1 = (struct axset*)a;
         3155  +  struct axset *p2 = (struct axset*)b;
         3156  +  return p2->nAction - p1->nAction;
         3157  +}
  3127   3158   
  3128   3159   /* Generate C source code for the parser */
  3129   3160   void ReportTable(lemp, mhflag)
  3130   3161   struct lemon *lemp;
  3131   3162   int mhflag;     /* Output in makeheaders format if true */
  3132   3163   {
  3133   3164     FILE *out, *in;
................................................................................
  3137   3168     struct action *ap;
  3138   3169     struct rule *rp;
  3139   3170     struct acttab *pActtab;
  3140   3171     int i, j, n;
  3141   3172     char *name;
  3142   3173     int mnTknOfst, mxTknOfst;
  3143   3174     int mnNtOfst, mxNtOfst;
         3175  +  struct axset *ax;
  3144   3176   
  3145   3177     in = tplt_open(lemp);
  3146   3178     if( in==0 ) return;
  3147   3179     out = file_open(lemp,".c","w");
  3148   3180     if( out==0 ){
  3149   3181       fclose(in);
  3150   3182       return;
................................................................................
  3237   3269     **                     shifting terminals.
  3238   3270     **  yy_reduce_ofst[]   For each state, the offset into yy_action for
  3239   3271     **                     shifting non-terminals after a reduce.
  3240   3272     **  yy_default[]       Default action for each state.
  3241   3273     */
  3242   3274   
  3243   3275     /* Compute the actions on all states and count them up */
         3276  +  ax = malloc( sizeof(ax[0])*lemp->nstate*2 );
         3277  +  if( ax==0 ){
         3278  +    fprintf(stderr,"malloc failed\n");
         3279  +    exit(1);
         3280  +  }
  3244   3281     for(i=0; i<lemp->nstate; i++){
  3245   3282       stp = lemp->sorted[i];
  3246   3283       stp->nTknAct = stp->nNtAct = 0;
  3247   3284       stp->iDflt = lemp->nstate + lemp->nrule;
  3248   3285       stp->iTknOfst = NO_OFFSET;
  3249   3286       stp->iNtOfst = NO_OFFSET;
  3250   3287       for(ap=stp->ap; ap; ap=ap->next){
................................................................................
  3254   3291           }else if( ap->sp->index<lemp->nsymbol ){
  3255   3292             stp->nNtAct++;
  3256   3293           }else{
  3257   3294             stp->iDflt = compute_action(lemp, ap);
  3258   3295           }
  3259   3296         }
  3260   3297       }
         3298  +    ax[i*2].stp = stp;
         3299  +    ax[i*2].isTkn = 1;
         3300  +    ax[i*2].nAction = stp->nTknAct;
         3301  +    ax[i*2+1].stp = stp;
         3302  +    ax[i*2+1].isTkn = 0;
         3303  +    ax[i*2+1].nAction = stp->nNtAct;
  3261   3304     }
  3262   3305     mxTknOfst = mnTknOfst = 0;
  3263   3306     mxNtOfst = mnNtOfst = 0;
  3264   3307   
  3265         -  /* Compute the action table.  Do this in two passes.  The first
  3266         -  ** pass does all entries with two or more actions and the second
  3267         -  ** pass does all states with a single action.
         3308  +  /* Compute the action table.  In order to try to keep the size of the
         3309  +  ** action table to a minimum, the heuristic of placing the largest action
         3310  +  ** sets first is used.
  3268   3311     */
         3312  +  qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
  3269   3313     pActtab = acttab_alloc();
  3270         -  for(j=0; j<=1; j++){
  3271         -    for(i=0; i<lemp->nstate; i++){
  3272         -      stp = lemp->sorted[i];
  3273         -      if( (j==0 && stp->nTknAct>=2) || (j==1 && stp->nTknAct==1) ){
  3274         -        for(ap=stp->ap; ap; ap=ap->next){
  3275         -          int action;
  3276         -          if( ap->sp->index>=lemp->nterminal ) continue;
  3277         -          action = compute_action(lemp, ap);
  3278         -          if( action<0 ) continue;
  3279         -          acttab_action(pActtab, ap->sp->index, action);
  3280         -        }
  3281         -        stp->iTknOfst = acttab_insert(pActtab);
  3282         -        if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
  3283         -        if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
         3314  +  for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
         3315  +    stp = ax[i].stp;
         3316  +    if( ax[i].isTkn ){
         3317  +      for(ap=stp->ap; ap; ap=ap->next){
         3318  +        int action;
         3319  +        if( ap->sp->index>=lemp->nterminal ) continue;
         3320  +        action = compute_action(lemp, ap);
         3321  +        if( action<0 ) continue;
         3322  +        acttab_action(pActtab, ap->sp->index, action);
         3323  +      }
         3324  +      stp->iTknOfst = acttab_insert(pActtab);
         3325  +      if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
         3326  +      if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
         3327  +    }else{
         3328  +      for(ap=stp->ap; ap; ap=ap->next){
         3329  +        int action;
         3330  +        if( ap->sp->index<lemp->nterminal ) continue;
         3331  +        if( ap->sp->index==lemp->nsymbol ) continue;
         3332  +        action = compute_action(lemp, ap);
         3333  +        if( action<0 ) continue;
         3334  +        acttab_action(pActtab, ap->sp->index, action);
  3284   3335         }
  3285         -      if( (j==0 && stp->nNtAct>=2) || (j==1 && stp->nNtAct==1) ){
  3286         -        for(ap=stp->ap; ap; ap=ap->next){
  3287         -          int action;
  3288         -          if( ap->sp->index<lemp->nterminal ) continue;
  3289         -          if( ap->sp->index==lemp->nsymbol ) continue;
  3290         -          action = compute_action(lemp, ap);
  3291         -          if( action<0 ) continue;
  3292         -          acttab_action(pActtab, ap->sp->index, action);
  3293         -        }
  3294         -        stp->iNtOfst = acttab_insert(pActtab);
  3295         -        if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
  3296         -        if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
  3297         -      }
         3336  +      stp->iNtOfst = acttab_insert(pActtab);
         3337  +      if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
         3338  +      if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
  3298   3339       }
  3299   3340     }
         3341  +  free(ax);
  3300   3342   
  3301   3343     /* Output the yy_action table */
  3302   3344     fprintf(out,"static YYACTIONTYPE yy_action[] = {\n"); lineno++;
  3303   3345     n = acttab_size(pActtab);
  3304   3346     for(i=j=0; i<n; i++){
  3305   3347       int action = acttab_yyaction(pActtab, i);
  3306   3348       if( action<0 ) action = lemp->nsymbol + lemp->nrule + 2;
         3349  +    if( j==0 ) fprintf(out," /* %5d */ ", i);
  3307   3350       fprintf(out, " %4d,", action);
  3308   3351       if( j==9 || i==n-1 ){
  3309   3352         fprintf(out, "\n"); lineno++;
  3310   3353         j = 0;
  3311   3354       }else{
  3312   3355         j++;
  3313   3356       }
................................................................................
  3315   3358     fprintf(out, "};\n"); lineno++;
  3316   3359   
  3317   3360     /* Output the yy_lookahead table */
  3318   3361     fprintf(out,"static YYCODETYPE yy_lookahead[] = {\n"); lineno++;
  3319   3362     for(i=j=0; i<n; i++){
  3320   3363       int la = acttab_yylookahead(pActtab, i);
  3321   3364       if( la<0 ) la = lemp->nsymbol;
         3365  +    if( j==0 ) fprintf(out," /* %5d */ ", i);
  3322   3366       fprintf(out, " %4d,", la);
  3323   3367       if( j==9 || i==n-1 ){
  3324   3368         fprintf(out, "\n"); lineno++;
  3325   3369         j = 0;
  3326   3370       }else{
  3327   3371         j++;
  3328   3372       }
................................................................................
  3335   3379             minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
  3336   3380     n = lemp->nstate;
  3337   3381     for(i=j=0; i<n; i++){
  3338   3382       int ofst;
  3339   3383       stp = lemp->sorted[i];
  3340   3384       ofst = stp->iTknOfst;
  3341   3385       if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
         3386  +    if( j==0 ) fprintf(out," /* %5d */ ", i);
  3342   3387       fprintf(out, " %4d,", ofst);
  3343   3388       if( j==9 || i==n-1 ){
  3344   3389         fprintf(out, "\n"); lineno++;
  3345   3390         j = 0;
  3346   3391       }else{
  3347   3392         j++;
  3348   3393       }
................................................................................
  3355   3400             minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
  3356   3401     n = lemp->nstate;
  3357   3402     for(i=j=0; i<n; i++){
  3358   3403       int ofst;
  3359   3404       stp = lemp->sorted[i];
  3360   3405       ofst = stp->iNtOfst;
  3361   3406       if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
         3407  +    if( j==0 ) fprintf(out," /* %5d */ ", i);
  3362   3408       fprintf(out, " %4d,", ofst);
  3363   3409       if( j==9 || i==n-1 ){
  3364   3410         fprintf(out, "\n"); lineno++;
  3365   3411         j = 0;
  3366   3412       }else{
  3367   3413         j++;
  3368   3414       }
................................................................................
  3370   3416     fprintf(out, "};\n"); lineno++;
  3371   3417   
  3372   3418     /* Output the default action table */
  3373   3419     fprintf(out, "static YYACTIONTYPE yy_default[] = {\n"); lineno++;
  3374   3420     n = lemp->nstate;
  3375   3421     for(i=j=0; i<n; i++){
  3376   3422       stp = lemp->sorted[i];
         3423  +    if( j==0 ) fprintf(out," /* %5d */ ", i);
  3377   3424       fprintf(out, " %4d,", stp->iDflt);
  3378   3425       if( j==9 || i==n-1 ){
  3379   3426         fprintf(out, "\n"); lineno++;
  3380   3427         j = 0;
  3381   3428       }else{
  3382   3429         j++;
  3383   3430       }

Changes to tool/lempar.c.

    55     55   **                       defined, then do no error processing.
    56     56   */
    57     57   %%
    58     58   #define YY_NO_ACTION      (YYNSTATE+YYNRULE+2)
    59     59   #define YY_ACCEPT_ACTION  (YYNSTATE+YYNRULE+1)
    60     60   #define YY_ERROR_ACTION   (YYNSTATE+YYNRULE)
    61     61   
    62         -/* Next are that ables used to determine what action to take based on the
           62  +/* Next are that tables used to determine what action to take based on the
    63     63   ** current state and lookahead token.  These tables are used to implement
    64     64   ** functions that take a state number and lookahead value and return an
    65     65   ** action integer.  
    66     66   **
    67     67   ** The action integer is a number N between
    68     68   ** 0 and YYNSTATE-1 mean shift the look-ahead and go to state N.
    69     69   ** Numbers between YYNSTATE and YYNSTATE+YYNRULE-1 mean reduce by