/ Check-in [45c158b1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Bring back test_regexp changes that were made when porting to Fossil. Also fix some bugs that were introduced by the Fossil port.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 45c158b1a015e0295244982e7a61ecc55cca8436
User & Date: drh 2013-01-03 19:34:46
Context
2013-01-04
14:06
Fix the zInit[] optimization of test_regexp.c when the initial string contains 3-byte UTF8 characters. check-in: 357231ec user: drh tags: trunk
2013-01-03
22:22
Merge recent trunk changes into the sessions branch. check-in: 7e068e39 user: drh tags: sessions
19:34
Bring back test_regexp changes that were made when porting to Fossil. Also fix some bugs that were introduced by the Fossil port. check-in: 45c158b1 user: drh tags: trunk
18:07
Improvements to column name resolution in queries with parenthesized FROM clauses. Also includes a fix for ticket [beba9cae6345a3]. check-in: 99127a66 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/test_regexp.c.

    83     83   ** the NFA.  The implementation is optimized for the common case where the
    84     84   ** number of actives states is small.
    85     85   */
    86     86   typedef struct ReStateSet {
    87     87     unsigned nState;            /* Number of current states */
    88     88     ReStateNumber *aState;      /* Current states */
    89     89   } ReStateSet;
           90  +
           91  +/* An input string read one character at a time.
           92  +*/
           93  +typedef struct ReInput ReInput;
           94  +struct ReInput {
           95  +  const unsigned char *z;  /* All text */
           96  +  int i;                   /* Next byte to read */
           97  +  int mx;                  /* EOF when i>=mx */
           98  +};
    90     99   
    91    100   /* A compiled NFA (or an NFA that is in the process of being compiled) is
    92    101   ** an instance of the following object.
    93    102   */
    94         -typedef struct ReCompiled {
    95         -  const unsigned char *zIn;   /* Regular expression text */
          103  +typedef struct ReCompiled ReCompiled;
          104  +struct ReCompiled {
          105  +  ReInput sIn;                /* Regular expression text */
    96    106     const char *zErr;           /* Error message to return */
    97    107     char *aOp;                  /* Operators for the virtual machine */
    98    108     int *aArg;                  /* Arguments to each operator */
          109  +  unsigned (*xNextChar)(ReInput*);  /* Next character function */
    99    110     char zInit[12];             /* Initial text to match */
   100    111     int nInit;                  /* Number of characters in zInit */
   101    112     unsigned nState;            /* Number of entries in aOp[] and aArg[] */
   102    113     unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */
   103         -} ReCompiled;
          114  +};
   104    115   
   105    116   /* Add a state to the given state set if it is not already there */
   106    117   static void re_add_state(ReStateSet *pSet, int newState){
   107    118     unsigned i;
   108    119     for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;
   109    120     pSet->aState[pSet->nState++] = newState;
   110    121   }
   111    122   
   112    123   /* Extract the next unicode character from *pzIn and return it.  Advance
   113    124   ** *pzIn to the first byte past the end of the character returned.  To
   114    125   ** be clear:  this routine converts utf8 to unicode.  This routine is 
   115    126   ** optimized for the common case where the next character is a single byte.
   116    127   */
   117         -static unsigned re_next_char(const unsigned char **pzIn){
   118         -  unsigned c = **pzIn;
   119         -  if( c>0 ) (*pzIn)++;
          128  +static unsigned re_next_char(ReInput *p){
          129  +  unsigned c;
          130  +  if( p->i>=p->mx ) return 0;
          131  +  c = p->z[p->i++];
   120    132     if( c>0x80 ){
   121         -    if( (c&0xe0)==0xc0 && ((*pzIn)[0]&0xc0)==0x80 ){
   122         -      c = (c&0x1f)<<6 | ((*pzIn)[0]&0x3f);
   123         -      (*pzIn)++;
          133  +    if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){
          134  +      c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f);
   124    135         if( c<0x80 ) c = 0xfffd;
   125         -    }else if( (c&0xf0)==0xe0 && ((*pzIn)[0]&0xc0)==0x80
   126         -           && ((*pzIn)[1]&0xc0)==0x80 ){
   127         -      c = (c&0x0f)<<12 | (((*pzIn)[0]&0x3f)<<6) | ((*pzIn)[1]&0x3f);
   128         -      *pzIn += 2;
          136  +    }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
          137  +           && (p->z[p->i+1]&0xc0)==0x80 ){
          138  +      c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
          139  +      p->i += 2;
   129    140         if( c<0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
   130         -    }else if( (c&0xf8)==0xf0 && ((*pzIn)[0]&0xc0)==0x80
   131         -           && ((*pzIn)[1]&0xc0)==0x80 && ((*pzIn)[2]&0xc0)==0x80 ){
   132         -      c = (c&0x07)<<18 | (((*pzIn)[0]&0x3f)<<12) | (((*pzIn)[1]&0x3f)<<6)
   133         -                       | ((*pzIn)[2]&0x3f);
   134         -      *pzIn += 3;
          141  +    }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
          142  +           && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
          143  +      c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
          144  +                       | (p->z[p->i+2]&0x3f);
          145  +      p->i += 3;
   135    146         if( c<0xffff ) c = 0xfffd;
   136    147       }else{
   137    148         c = 0xfffd;
   138    149       }
   139    150     }
   140    151     return c;
          152  +}
          153  +static unsigned re_next_char_nocase(ReInput *p){
          154  +  unsigned c = re_next_char(p);
          155  +  if( c>='A' && c<='Z' ) c += 'a' - 'A';
          156  +  return c;
   141    157   }
   142    158   
   143    159   /* Return true if c is a perl "word" character:  [A-Za-z0-9_] */
   144    160   static int re_word_char(int c){
   145    161     return (c>='0' && c<='9') || (c>='a' && c<='z')
   146    162         || (c>='A' && c<='Z') || c=='_';
   147    163   }
................................................................................
   155    171   static int re_space_char(int c){
   156    172     return c==' ' || c=='\t' || c=='\n' || c=='\v' || c=='\f';
   157    173   }
   158    174   
   159    175   /* Run a compiled regular expression on the zero-terminated input
   160    176   ** string zIn[].  Return true on a match and false if there is no match.
   161    177   */
   162         -static int re_exec(ReCompiled *pRe, const unsigned char *zIn){
          178  +int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
   163    179     ReStateSet aStateSet[2], *pThis, *pNext;
   164    180     ReStateNumber aSpace[100];
   165    181     ReStateNumber *pToFree;
   166    182     unsigned int i = 0;
   167    183     unsigned int iSwap = 0;
   168    184     int c = RE_EOF+1;
   169    185     int cPrev = 0;
   170    186     int rc = 0;
   171         -  
          187  +  ReInput in;
          188  +
          189  +  in.z = zIn;
          190  +  in.i = 0;
          191  +  in.mx = nIn>=0 ? nIn : strlen((char*)zIn);
   172    192     if( pRe->nInit ){
   173    193       unsigned char x = pRe->zInit[0];
   174         -    while( zIn[0] && (zIn[0]!=x || memcmp(zIn, pRe->zInit, pRe->nInit)!=0) ){
   175         -      zIn++;
          194  +    while( in.i+pRe->nInit<=in.mx 
          195  +        && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
          196  +    ){
          197  +      in.i++;
   176    198       }
   177         -    if( zIn[0]==0 ) return 0;
          199  +    if( in.i+pRe->nInit>in.mx ) return 0;
   178    200     }
   179    201     if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
   180    202       pToFree = 0;
   181    203       aStateSet[0].aState = aSpace;
   182    204     }else{
   183         -    pToFree = malloc( sizeof(ReStateNumber)*2*pRe->nState );
          205  +    pToFree = sqlite3_malloc( sizeof(ReStateNumber)*2*pRe->nState );
   184    206       if( pToFree==0 ) return -1;
   185    207       aStateSet[0].aState = pToFree;
   186    208     }
   187    209     aStateSet[1].aState = &aStateSet[0].aState[pRe->nState];
   188    210     pNext = &aStateSet[1];
   189    211     pNext->nState = 0;
   190    212     re_add_state(pNext, 0);
   191    213     while( c!=RE_EOF && pNext->nState>0 ){
   192    214       cPrev = c;
   193         -    c = re_next_char(&zIn);
          215  +    c = pRe->xNextChar(&in);
   194    216       pThis = pNext;
   195    217       pNext = &aStateSet[iSwap];
   196    218       iSwap = 1 - iSwap;
   197    219       pNext->nState = 0;
   198    220       for(i=0; i<pThis->nState; i++){
   199    221         int x = pThis->aState[i];
   200    222         switch( pRe->aOp[x] ){
................................................................................
   246    268           }
   247    269           case RE_OP_GOTO: {
   248    270             re_add_state(pThis, x+pRe->aArg[x]);
   249    271             break;
   250    272           }
   251    273           case RE_OP_ACCEPT: {
   252    274             rc = 1;
   253         -          goto re_exec_end;
          275  +          goto re_match_end;
   254    276           }
   255    277           case RE_OP_CC_INC:
   256    278           case RE_OP_CC_EXC: {
   257    279             int j = 1;
   258    280             int n = pRe->aArg[x];
   259    281             int hit = 0;
   260    282             for(j=1; j>0 && j<n; j++){
................................................................................
   278    300           }
   279    301         }
   280    302       }
   281    303     }
   282    304     for(i=0; i<pNext->nState; i++){
   283    305       if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; }
   284    306     }
   285         -re_exec_end:
   286         -  free(pToFree);
          307  +re_match_end:
          308  +  sqlite3_free(pToFree);
   287    309     return rc;
   288    310   }
   289    311   
   290    312   /* Resize the opcode and argument arrays for an RE under construction.
   291    313   */
   292    314   static int re_resize(ReCompiled *p, int N){
   293    315     char *aOp;
   294    316     int *aArg;
   295         -  aOp = realloc(p->aOp, N*sizeof(p->aOp[0]));
          317  +  aOp = sqlite3_realloc(p->aOp, N*sizeof(p->aOp[0]));
   296    318     if( aOp==0 ) return 1;
   297    319     p->aOp = aOp;
   298         -  aArg = realloc(p->aArg, N*sizeof(p->aArg[0]));
          320  +  aArg = sqlite3_realloc(p->aArg, N*sizeof(p->aArg[0]));
   299    321     if( aArg==0 ) return 1;
   300    322     p->aArg = aArg;
   301    323     p->nAlloc = N;
   302    324     return 0;
   303    325   }
   304    326   
   305    327   /* Insert a new opcode and argument into an RE under construction.  The
................................................................................
   355    377   /* A backslash character has been seen, read the next character and
   356    378   ** return its intepretation.
   357    379   */
   358    380   static unsigned re_esc_char(ReCompiled *p){
   359    381     static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
   360    382     static const char zTrans[] = "\a\f\n\r\t\v";
   361    383     int i, v = 0;
   362         -  char c = p->zIn[0];
   363         -  if( c=='u' ){
          384  +  char c;
          385  +  if( p->sIn.i>=p->sIn.mx ) return 0;
          386  +  c = p->sIn.z[p->sIn.i];
          387  +  if( c=='u' && p->sIn.i+5<p->sIn.mx ){
   364    388       v = 0;
   365         -    if( re_hex(p->zIn[1],&v)
   366         -     && re_hex(p->zIn[2],&v)
   367         -     && re_hex(p->zIn[3],&v)
   368         -     && re_hex(p->zIn[4],&v)
          389  +    const unsigned char *zIn = p->sIn.z + p->sIn.i;
          390  +    if( re_hex(zIn[1],&v)
          391  +     && re_hex(zIn[2],&v)
          392  +     && re_hex(zIn[3],&v)
          393  +     && re_hex(zIn[4],&v)
   369    394       ){
   370         -      p->zIn += 5;
          395  +      p->sIn.i += 5;
   371    396         return v;
   372    397       }
   373    398     }
   374    399     if( c=='x' ){
   375    400       v = 0;
   376         -    for(i=1; re_hex(p->zIn[i], &v); i++){}
          401  +    for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
   377    402       if( i>1 ){
   378         -      p->zIn += i;
          403  +      p->sIn.i += i;
   379    404         return v;
   380    405       }
   381    406     }
   382    407     for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
   383    408     if( zEsc[i] ){
   384    409       if( i<6 ) c = zTrans[i];
   385         -    p->zIn++;
          410  +    p->sIn.i++;
   386    411     }else{
   387    412       p->zErr = "unknown \\ escape";
   388    413     }
   389    414     return c;
   390    415   }
   391    416   
   392    417   /* Forward declaration */
   393    418   static const char *re_subcompile_string(ReCompiled*);
          419  +
          420  +/* Peek at the next byte of input */
          421  +static unsigned char rePeek(ReCompiled *p){
          422  +  return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;
          423  +}
   394    424   
   395    425   /* Compile RE text into a sequence of opcodes.  Continue up to the
   396    426   ** first unmatched ")" character, then return.  If an error is found,
   397    427   ** return a pointer to the error message string.
   398    428   */
   399    429   static const char *re_subcompile_re(ReCompiled *p){
   400    430     const char *zErr;
   401    431     int iStart, iEnd, iGoto;
   402    432     iStart = p->nState;
   403    433     zErr = re_subcompile_string(p);
   404    434     if( zErr ) return zErr;
   405         -  while( p->zIn[0]=='|' ){
          435  +  while( rePeek(p)=='|' ){
   406    436       iEnd = p->nState;
   407    437       re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
   408    438       iGoto = re_append(p, RE_OP_GOTO, 0);
   409         -    p->zIn++;
          439  +    p->sIn.i++;
   410    440       zErr = re_subcompile_string(p);
   411    441       if( zErr ) return zErr;
   412    442       p->aArg[iGoto] = p->nState - iGoto;
   413    443     }
   414    444     return 0;
   415    445   }
   416    446   
................................................................................
   419    449   ** to the error message if there is a problem.
   420    450   */
   421    451   static const char *re_subcompile_string(ReCompiled *p){
   422    452     int iPrev = -1;
   423    453     int iStart;
   424    454     unsigned c;
   425    455     const char *zErr;
   426         -  while( (c = re_next_char(&p->zIn))!=0 ){
          456  +  while( (c = p->xNextChar(&p->sIn))!=0 ){
   427    457       iStart = p->nState;
   428    458       switch( c ){
   429    459         case '|':
   430    460         case '$': 
   431    461         case ')': {
   432         -        p->zIn--;
          462  +        p->sIn.i--;
   433    463           return 0;
   434    464         }
   435    465         case '(': {
   436    466           zErr = re_subcompile_re(p);
   437    467           if( zErr ) return zErr;
   438         -        if( p->zIn[0]!=')' ) return "unmatched '('";
   439         -        p->zIn++;
          468  +        if( rePeek(p)!=')' ) return "unmatched '('";
          469  +        p->sIn.i++;
   440    470           break;
   441    471         }
   442    472         case '.': {
   443         -        if( p->zIn[0]=='*' ){
          473  +        if( rePeek(p)=='*' ){
   444    474             re_append(p, RE_OP_ANYSTAR, 0);
   445         -          p->zIn++;
          475  +          p->sIn.i++;
   446    476           }else{ 
   447    477             re_append(p, RE_OP_ANY, 0);
   448    478           }
   449    479           break;
   450    480         }
   451    481         case '*': {
   452    482           if( iPrev<0 ) return "'*' without operand";
................................................................................
   464    494           re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1);
   465    495           break;
   466    496         }
   467    497         case '{': {
   468    498           int m = 0, n = 0;
   469    499           int sz, j;
   470    500           if( iPrev<0 ) return "'{m,n}' without operand";
   471         -        while( (c=p->zIn[0])>='0' && c<='9' ){ m = m*10 + c - '0'; p->zIn++; }
          501  +        while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; }
   472    502           n = m;
   473    503           if( c==',' ){
   474         -          p->zIn++;
          504  +          p->sIn.i++;
   475    505             n = 0;
   476         -          while( (c=p->zIn[0])>='0' && c<='9' ){ n = n*10 + c - '0'; p->zIn++; }
          506  +          while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; }
   477    507           }
   478    508           if( c!='}' ) return "unmatched '{'";
   479    509           if( n>0 && n<m ) return "n less than m in '{m,n}'";
   480         -        p->zIn++;
          510  +        p->sIn.i++;
   481    511           sz = p->nState - iPrev;
   482    512           if( m==0 ){
   483    513             if( n==0 ) return "both m and n are zero in '{m,n}'";
   484    514             re_insert(p, iPrev, RE_OP_FORK, sz+1);
   485    515             n--;
   486    516           }else{
   487    517             for(j=1; j<m; j++) re_copy(p, iPrev, sz);
................................................................................
   493    523           if( n==0 && m>0 ){
   494    524             re_append(p, RE_OP_FORK, -sz);
   495    525           }
   496    526           break;
   497    527         }
   498    528         case '[': {
   499    529           int iFirst = p->nState;
   500         -        if( p->zIn[0]=='^' ){
          530  +        if( rePeek(p)=='^' ){
   501    531             re_append(p, RE_OP_CC_EXC, 0);
   502         -          p->zIn++;
          532  +          p->sIn.i++;
   503    533           }else{
   504    534             re_append(p, RE_OP_CC_INC, 0);
   505    535           }
   506         -        while( (c = re_next_char(&p->zIn))!=0 ){
   507         -          if( c=='[' && p->zIn[0]==':' ){
          536  +        while( (c = p->xNextChar(&p->sIn))!=0 ){
          537  +          if( c=='[' && rePeek(p)==':' ){
   508    538               return "POSIX character classes not supported";
   509    539             }
   510    540             if( c=='\\' ) c = re_esc_char(p);
   511         -          if( p->zIn[0]=='-' && p->zIn[1] ){
          541  +          if( rePeek(p)=='-' ){
   512    542               re_append(p, RE_OP_CC_RANGE, c);
   513         -            p->zIn++;
   514         -            c = re_next_char(&p->zIn);
          543  +            p->sIn.i++;
          544  +            c = p->xNextChar(&p->sIn);
   515    545               if( c=='\\' ) c = re_esc_char(p);
   516    546               re_append(p, RE_OP_CC_RANGE, c);
   517    547             }else{
   518    548               re_append(p, RE_OP_CC_VALUE, c);
   519    549             }
   520         -          if( p->zIn[0]==']' ){ p->zIn++; break; }
          550  +          if( rePeek(p)==']' ){ p->sIn.i++; break; }
   521    551           }
   522    552           if( c==0 ) return "unclosed '['";
   523    553           p->aArg[iFirst] = p->nState - iFirst;
   524    554           break;
   525    555         }
   526    556         case '\\': {
   527    557           int specialOp = 0;
   528         -        switch( p->zIn[0] ){
          558  +        switch( rePeek(p) ){
   529    559             case 'b': specialOp = RE_OP_BOUNDARY;   break;
   530    560             case 'd': specialOp = RE_OP_DIGIT;      break;
   531    561             case 'D': specialOp = RE_OP_NOTDIGIT;   break;
   532    562             case 's': specialOp = RE_OP_SPACE;      break;
   533    563             case 'S': specialOp = RE_OP_NOTSPACE;   break;
   534    564             case 'w': specialOp = RE_OP_WORD;       break;
   535    565             case 'W': specialOp = RE_OP_NOTWORD;    break;
   536    566           }
   537    567           if( specialOp ){
   538         -          p->zIn++;
          568  +          p->sIn.i++;
   539    569             re_append(p, specialOp, 0);
   540    570           }else{
   541    571             c = re_esc_char(p);
   542    572             re_append(p, RE_OP_MATCH, c);
   543    573           }
   544    574           break;
   545    575         }
................................................................................
   553    583     return 0;
   554    584   }
   555    585   
   556    586   /* Free and reclaim all the memory used by a previously compiled
   557    587   ** regular expression.  Applications should invoke this routine once
   558    588   ** for every call to re_compile() to avoid memory leaks.
   559    589   */
   560         -static void re_free(ReCompiled *pRe){
          590  +void re_free(ReCompiled *pRe){
   561    591     if( pRe ){
   562         -    free(pRe->aOp);
   563         -    free(pRe->aArg);
          592  +    sqlite3_free(pRe->aOp);
          593  +    sqlite3_free(pRe->aArg);
          594  +    sqlite3_free(pRe);
   564    595     }
   565    596   }
   566    597   
   567    598   /*
   568    599   ** Compile a textual regular expression in zIn[] into a compiled regular
   569         -** expression suitable for us by re_exec() and return a pointer to the
          600  +** expression suitable for us by re_match() and return a pointer to the
   570    601   ** compiled regular expression in *ppRe.  Return NULL on success or an
   571    602   ** error message if something goes wrong.
   572    603   */
   573         -static const char *re_compile(ReCompiled **ppRe, const char *zIn){
          604  +const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
   574    605     ReCompiled *pRe;
   575    606     const char *zErr;
   576    607     int i, j;
   577    608   
   578    609     *ppRe = 0;
   579         -  pRe = malloc( sizeof(*pRe) );
          610  +  pRe = sqlite3_malloc( sizeof(*pRe) );
   580    611     if( pRe==0 ){
   581    612       return "out of memory";
   582    613     }
   583    614     memset(pRe, 0, sizeof(*pRe));
          615  +  pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char;
   584    616     if( re_resize(pRe, 30) ){
   585    617       re_free(pRe);
   586    618       return "out of memory";
   587    619     }
   588    620     if( zIn[0]=='^' ){
   589    621       zIn++;
   590    622     }else{
   591    623       re_append(pRe, RE_OP_ANYSTAR, 0);
   592    624     }
   593         -  pRe->zIn = (unsigned char*)zIn;
          625  +  pRe->sIn.z = (unsigned char*)zIn;
          626  +  pRe->sIn.i = 0;
          627  +  pRe->sIn.mx = strlen((char*)pRe->sIn.z);
   594    628     zErr = re_subcompile_re(pRe);
   595    629     if( zErr ){
   596    630       re_free(pRe);
   597    631       return zErr;
   598    632     }
   599         -  if( pRe->zIn[0]=='$' && pRe->zIn[1]==0 ){
          633  +  if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
   600    634       re_append(pRe, RE_OP_MATCH, RE_EOF);
   601    635       re_append(pRe, RE_OP_ACCEPT, 0);
   602    636       *ppRe = pRe;
   603         -  }else if( pRe->zIn[0]==0 ){
          637  +  }else if( pRe->sIn.i>=pRe->sIn.mx ){
   604    638       re_append(pRe, RE_OP_ACCEPT, 0);
   605    639       *ppRe = pRe;
   606    640     }else{
   607    641       re_free(pRe);
   608    642       return "unrecognized character";
   609    643     }
   610    644     if( pRe->aOp[0]==RE_OP_ANYSTAR ){
................................................................................
   619    653           pRe->zInit[j++] = 0xd0 | (x>>12);
   620    654           pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
   621    655           pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f);
   622    656         }else{
   623    657           break;
   624    658         }
   625    659       }
          660  +    if( j>0 && pRe->zInit[j-1]==0 ) j--;
   626    661       pRe->nInit = j;
   627    662     }
   628    663     return pRe->zErr;
   629    664   }
   630    665   
   631    666   /*
   632    667   ** Implementation of the regexp() SQL function.  This function implements
................................................................................
   647    682     const unsigned char *zStr;/* String being searched */
   648    683     const char *zErr;         /* Compile error message */
   649    684   
   650    685     pRe = sqlite3_get_auxdata(context, 0);
   651    686     if( pRe==0 ){
   652    687       zPattern = (const char*)sqlite3_value_text(argv[0]);
   653    688       if( zPattern==0 ) return;
   654         -    zErr = re_compile(&pRe, zPattern);
          689  +    zErr = re_compile(&pRe, zPattern, 0);
   655    690       if( zErr ){
          691  +      re_free(pRe);
   656    692         sqlite3_result_error(context, zErr, -1);
   657    693         return;
   658    694       }
   659    695       if( pRe==0 ){
   660    696         sqlite3_result_error_nomem(context);
   661    697         return;
   662    698       }
   663    699       sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
   664    700     }
   665    701     zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
   666    702     if( zStr!=0 ){
   667         -    sqlite3_result_int(context, re_exec(pRe, zStr));
          703  +    sqlite3_result_int(context, re_match(pRe, zStr, -1));
   668    704     }
   669    705   }
   670    706   
   671    707   /*
   672    708   ** Invoke this routine in order to install the REGEXP function in an
   673    709   ** SQLite database connection.
   674    710   **