/ Check-in [a4c890b0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Refactor the sqlite3_normalized_sql() implementation. This is a work-in-progress. There are still issues.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | normalize-refactor
Files: files | file ages | folders
SHA3-256:a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083
User & Date: drh 2018-12-10 16:00:57
Context
2018-12-10
16:49
Fix issues with the new normalizer. Leaf check-in: 057d7d40 user: drh tags: normalize-refactor
16:00
Refactor the sqlite3_normalized_sql() implementation. This is a work-in-progress. There are still issues. check-in: a4c890b0 user: drh tags: normalize-refactor
01:48
Add support for the VACUUM INTO command. check-in: 77f150b8 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/prepare.c.

   705    705     sqlite3BtreeLeaveAll(db);
   706    706     rc = sqlite3ApiExit(db, rc);
   707    707     assert( (rc&db->errMask)==rc );
   708    708     sqlite3_mutex_leave(db->mutex);
   709    709     return rc;
   710    710   }
   711    711   
   712         -#ifdef SQLITE_ENABLE_NORMALIZE
   713         -
   714         -/*
   715         -** Attempt to estimate the final output buffer size needed for the fully
   716         -** normalized version of the specified SQL string.  This should take into
   717         -** account any potential expansion that could occur (e.g. via IN clauses
   718         -** being expanded, etc).  This size returned is the total number of bytes
   719         -** including the NUL terminator.
   720         -*/
   721         -static int estimateNormalizedSize(
   722         -  const char *zSql, /* The original SQL string */
   723         -  int nSql          /* Length of original SQL string */
   724         -){
   725         -  int nOut = nSql + 4;
   726         -  const char *z = zSql;
   727         -  while( nOut<nSql*5 ){
   728         -    while( z[0]!=0 && z[0]!='I' && z[0]!='i' ){ z++; }
   729         -    if( z[0]==0 ) break;
   730         -    z++;
   731         -    if( z[0]!='N' && z[0]!='n' ) break;
   732         -    z++;
   733         -    while( sqlite3Isspace(z[0]) ){ z++; }
   734         -    if( z[0]!='(' ) break;
   735         -    z++;
   736         -    nOut += 5; /* ?,?,? */
   737         -  }
   738         -  return nOut;
   739         -}
   740         -
   741         -/*
   742         -** Copy the current token into the output buffer while dealing with quoted
   743         -** identifiers.  By default, all letters will be converted into lowercase.
   744         -** If the bUpper flag is set, uppercase will be used.  The piOut argument
   745         -** will be used to update the target index into the output string.
   746         -*/
   747         -static void copyNormalizedToken(
   748         -  const char *zSql, /* The original SQL string */
   749         -  int iIn,          /* Current index into the original SQL string */
   750         -  int nToken,       /* Number of bytes in the current token */
   751         -  int tokenFlags,   /* Flags returned by the tokenizer */
   752         -  char *zOut,       /* The output string */
   753         -  int *piOut        /* Pointer to target index into the output string */
   754         -){
   755         -  int bQuoted = tokenFlags & SQLITE_TOKEN_QUOTED;
   756         -  int bKeyword = tokenFlags & SQLITE_TOKEN_KEYWORD;
   757         -  int j = *piOut, k = 0;
   758         -  for(; k<nToken; k++){
   759         -    if( bQuoted ){
   760         -      if( k==0 && iIn>0 ){
   761         -        zOut[j++] = '"';
   762         -        continue;
   763         -      }else if( k==nToken-1 ){
   764         -        zOut[j++] = '"';
   765         -        continue;
   766         -      }
   767         -    }
   768         -    if( bKeyword ){
   769         -      zOut[j++] = sqlite3Toupper(zSql[iIn+k]);
   770         -    }else{
   771         -      zOut[j++] = sqlite3Tolower(zSql[iIn+k]);
   772         -    }
   773         -  }
   774         -  *piOut = j;
   775         -}
   776         -
   777         -/*
   778         -** Compute a normalization of the SQL given by zSql[0..nSql-1].  Return
   779         -** the normalization in space obtained from sqlite3DbMalloc().  Or return
   780         -** NULL if anything goes wrong or if zSql is NULL.
   781         -*/
   782         -char *sqlite3Normalize(
   783         -  Vdbe *pVdbe,      /* VM being reprepared */
   784         -  const char *zSql, /* The original SQL string */
   785         -  int nSql          /* Size of the input string in bytes */
   786         -){
   787         -  sqlite3 *db;           /* Database handle. */
   788         -  char *z;               /* The output string */
   789         -  int nZ;                /* Size of the output string in bytes */
   790         -  int i;                 /* Next character to read from zSql[] */
   791         -  int j;                 /* Next character to fill in on z[] */
   792         -  int tokenType = 0;     /* Type of the next token */
   793         -  int prevTokenType = 0; /* Type of the previous token, except spaces */
   794         -  int n;                 /* Size of the next token */
   795         -  int nParen = 0;        /* Nesting level of parenthesis */
   796         -  int iStartIN = 0;      /* Start of RHS of IN operator in z[] */
   797         -  int nParenAtIN = 0;    /* Value of nParent at start of RHS of IN operator */
   798         -
   799         -  db = sqlite3VdbeDb(pVdbe);
   800         -  assert( db!=0 );
   801         -  if( zSql==0 ) return 0;
   802         -  nZ = estimateNormalizedSize(zSql, nSql);
   803         -  z = sqlite3DbMallocRawNN(db, nZ);
   804         -  if( z==0 ) goto normalizeError;
   805         -  for(i=j=0; i<nSql && zSql[i]; i+=n){
   806         -    int flags = 0;
   807         -    if( tokenType!=TK_SPACE ) prevTokenType = tokenType;
   808         -    n = sqlite3GetTokenNormalized((unsigned char*)zSql+i, &tokenType, &flags);
   809         -    switch( tokenType ){
   810         -      case TK_SPACE: {
   811         -        break;
   812         -      }
   813         -      case TK_ILLEGAL: {
   814         -        goto normalizeError;
   815         -      }
   816         -      case TK_STRING:
   817         -      case TK_INTEGER:
   818         -      case TK_FLOAT:
   819         -      case TK_VARIABLE:
   820         -      case TK_BLOB: {
   821         -        z[j++] = '?';
   822         -        break;
   823         -      }
   824         -      case TK_LP:
   825         -      case TK_RP: {
   826         -        if( tokenType==TK_LP ){
   827         -          nParen++;
   828         -          if( prevTokenType==TK_IN ){
   829         -            iStartIN = j;
   830         -            nParenAtIN = nParen;
   831         -          }
   832         -        }else{
   833         -          if( iStartIN>0 && nParen==nParenAtIN ){
   834         -            assert( iStartIN+6<nZ );
   835         -            memcpy(z+iStartIN+1, "?,?,?", 5);
   836         -            j = iStartIN+6;
   837         -            assert( nZ-1-j>=0 );
   838         -            assert( nZ-1-j<nZ );
   839         -            memset(z+j, 0, nZ-1-j);
   840         -            iStartIN = 0;
   841         -          }
   842         -          nParen--;
   843         -        }
   844         -        assert( nParen>=0 );
   845         -        /* Fall through */
   846         -      }
   847         -      case TK_MINUS:
   848         -      case TK_SEMI:
   849         -      case TK_PLUS:
   850         -      case TK_STAR:
   851         -      case TK_SLASH:
   852         -      case TK_REM:
   853         -      case TK_EQ:
   854         -      case TK_LE:
   855         -      case TK_NE:
   856         -      case TK_LSHIFT:
   857         -      case TK_LT:
   858         -      case TK_RSHIFT:
   859         -      case TK_GT:
   860         -      case TK_GE:
   861         -      case TK_BITOR:
   862         -      case TK_CONCAT:
   863         -      case TK_COMMA:
   864         -      case TK_BITAND:
   865         -      case TK_BITNOT:
   866         -      case TK_DOT:
   867         -      case TK_IN:
   868         -      case TK_IS:
   869         -      case TK_NOT:
   870         -      case TK_NULL:
   871         -      case TK_ID: {
   872         -        if( tokenType==TK_NULL ){
   873         -          if( prevTokenType==TK_IS || prevTokenType==TK_NOT ){
   874         -            /* NULL is a keyword in this case, not a literal value */
   875         -          }else{
   876         -            /* Here the NULL is a literal value */
   877         -            z[j++] = '?';
   878         -            break;
   879         -          }
   880         -        }
   881         -        if( j>0 && sqlite3IsIdChar(z[j-1]) && sqlite3IsIdChar(zSql[i]) ){
   882         -          z[j++] = ' ';
   883         -        }
   884         -        if( tokenType==TK_ID ){
   885         -          if( zSql[i]=='"'
   886         -           && sqlite3VdbeUsesDoubleQuotedString(db,pVdbe,zSql+i,n)
   887         -          ){
   888         -            z[j++] = '?';
   889         -            break;
   890         -          }
   891         -          if( nParen==nParenAtIN ) iStartIN = 0;
   892         -        }
   893         -        copyNormalizedToken(zSql, i, n, flags, z, &j);
   894         -        break;
   895         -      }
   896         -    }
   897         -  }
   898         -  assert( j<nZ && "one" );
   899         -  while( j>0 && z[j-1]==' ' ){ j--; }
   900         -  if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; }
   901         -  z[j] = 0;
   902         -  assert( j<nZ && "two" );
   903         -  return z;
   904         -
   905         -normalizeError:
   906         -  sqlite3DbFree(db, z);
   907         -  return 0;
   908         -}
   909         -#endif /* SQLITE_ENABLE_NORMALIZE */
   910    712   
   911    713   /*
   912    714   ** Rerun the compilation of a statement after a schema change.
   913    715   **
   914    716   ** If the statement is successfully recompiled, return SQLITE_OK. Otherwise,
   915    717   ** if the statement cannot be recompiled because another connection has
   916    718   ** locked the sqlite3_master table, return SQLITE_LOCKED. If any other error

Changes to src/sqliteInt.h.

  4251   4251   #endif
  4252   4252   void sqlite3RootPageMoved(sqlite3*, int, int, int);
  4253   4253   void sqlite3Reindex(Parse*, Token*, Token*);
  4254   4254   void sqlite3AlterFunctions(void);
  4255   4255   void sqlite3AlterRenameTable(Parse*, SrcList*, Token*);
  4256   4256   void sqlite3AlterRenameColumn(Parse*, SrcList*, Token*, Token*);
  4257   4257   int sqlite3GetToken(const unsigned char *, int *);
  4258         -#ifdef SQLITE_ENABLE_NORMALIZE
  4259         -int sqlite3GetTokenNormalized(const unsigned char *, int *, int *);
  4260         -#endif
  4261   4258   void sqlite3NestedParse(Parse*, const char*, ...);
  4262   4259   void sqlite3ExpirePreparedStatements(sqlite3*, int);
  4263   4260   int sqlite3CodeSubselect(Parse*, Expr *, int, int);
  4264   4261   void sqlite3SelectPrep(Parse*, Select*, NameContext*);
  4265   4262   void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p);
  4266   4263   int sqlite3MatchSpanName(const char*, const char*, const char*, const char*);
  4267   4264   int sqlite3ResolveExprNames(NameContext*, Expr*);

Changes to src/tokenize.c.

   541    541       }
   542    542     }
   543    543     while( IdChar(z[i]) ){ i++; }
   544    544     *tokenType = TK_ID;
   545    545     return i;
   546    546   }
   547    547   
   548         -#ifdef SQLITE_ENABLE_NORMALIZE
   549         -/*
   550         -** Return the length (in bytes) of the token that begins at z[0].
   551         -** Store the token type in *tokenType before returning.  If flags has
   552         -** SQLITE_TOKEN_NORMALIZE flag enabled, use the identifier token type
   553         -** for keywords.  Add SQLITE_TOKEN_QUOTED to flags if the token was
   554         -** actually a quoted identifier.  Add SQLITE_TOKEN_KEYWORD to flags
   555         -** if the token was recognized as a keyword; this is useful when the
   556         -** SQLITE_TOKEN_NORMALIZE flag is used, because it enables the caller
   557         -** to differentiate between a keyword being treated as an identifier
   558         -** (for normalization purposes) and an actual identifier.
   559         -*/
   560         -int sqlite3GetTokenNormalized(
   561         -  const unsigned char *z,
   562         -  int *tokenType,
   563         -  int *flags
   564         -){
   565         -  int n;
   566         -  unsigned char iClass = aiClass[*z];
   567         -  if( iClass==CC_KYWD ){
   568         -    int i;
   569         -    for(i=1; aiClass[z[i]]<=CC_KYWD; i++){}
   570         -    if( IdChar(z[i]) ){
   571         -      /* This token started out using characters that can appear in keywords,
   572         -      ** but z[i] is a character not allowed within keywords, so this must
   573         -      ** be an identifier instead */
   574         -      i++;
   575         -      while( IdChar(z[i]) ){ i++; }
   576         -      *tokenType = TK_ID;
   577         -      return i;
   578         -    }
   579         -    *tokenType = TK_ID;
   580         -    n = keywordCode((char*)z, i, tokenType);
   581         -    /* If the token is no longer considered to be an identifier, then it is a
   582         -    ** keyword of some kind.  Make the token back into an identifier and then
   583         -    ** set the SQLITE_TOKEN_KEYWORD flag.  Several non-identifier tokens are
   584         -    ** used verbatim, including IN, IS, NOT, and NULL. */
   585         -    switch( *tokenType ){
   586         -      case TK_ID: {
   587         -        /* do nothing, handled by caller */
   588         -        break;
   589         -      }
   590         -      case TK_IN:
   591         -      case TK_IS:
   592         -      case TK_NOT:
   593         -      case TK_NULL: {
   594         -        *flags |= SQLITE_TOKEN_KEYWORD;
   595         -        break;
   596         -      }
   597         -      default: {
   598         -        *tokenType = TK_ID;
   599         -        *flags |= SQLITE_TOKEN_KEYWORD;
   600         -        break;
   601         -      }
   602         -    }
   603         -  }else{
   604         -    n = sqlite3GetToken(z, tokenType);
   605         -    /* If the token is considered to be an identifier and the character class
   606         -    ** of the first character is a quote, set the SQLITE_TOKEN_QUOTED flag. */
   607         -    if( *tokenType==TK_ID && (iClass==CC_QUOTE || iClass==CC_QUOTE2) ){
   608         -      *flags |= SQLITE_TOKEN_QUOTED;
   609         -    }
   610         -  }
   611         -  return n;
   612         -}
   613         -#endif /* SQLITE_ENABLE_NORMALIZE */
   614         -
   615    548   /*
   616    549   ** Run the parser on the given SQL string.  The parser structure is
   617    550   ** passed in.  An SQLITE_ status code is returned.  If an error occurs
   618    551   ** then an and attempt is made to write an error message into 
   619    552   ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
   620    553   ** error message.
   621    554   */
................................................................................
   777    710       Table *p = pParse->pZombieTab;
   778    711       pParse->pZombieTab = p->pNextZombie;
   779    712       sqlite3DeleteTable(db, p);
   780    713     }
   781    714     assert( nErr==0 || pParse->rc!=SQLITE_OK );
   782    715     return nErr;
   783    716   }
          717  +
          718  +
          719  +#ifdef SQLITE_ENABLE_NORMALIZE
          720  +/*
          721  +** Insert a single space character into pStr if the current string
          722  +** ends with an identifier
          723  +*/
          724  +static void addSpaceSeparator(sqlite3_str *pStr){
          725  +  if( pStr->nChar && sqlite3IsIdChar(pStr->zText[pStr->nChar-1]) ){
          726  +    sqlite3_str_append(pStr, " ", 1);
          727  +  }
          728  +}
          729  +
          730  +/*
          731  +** Compute a normalization of the SQL given by zSql[0..nSql-1].  Return
          732  +** the normalization in space obtained from sqlite3DbMalloc().  Or return
          733  +** NULL if anything goes wrong or if zSql is NULL.
          734  +*/
          735  +char *sqlite3Normalize(
          736  +  Vdbe *pVdbe,       /* VM being reprepared */
          737  +  const char *zSql,  /* The original SQL string */
          738  +  int nSql           /* Size of the input string in bytes */
          739  +){
          740  +  sqlite3 *db;       /* The database connection */
          741  +  int i;             /* Next unread byte of zSql[] */
          742  +  int n;             /* length of current token */
          743  +  int tokenType;     /* type of current token */
          744  +  int prevType;      /* Previous non-whitespace token */
          745  +  int nParen;        /* Number of nested levels of parentheses */
          746  +  int iStartIN;      /* Start of RHS of IN operator in z[] */
          747  +  int nParenAtIN;    /* Value of nParent at start of RHS of IN operator */
          748  +  int j;             /* Bytes of normalized SQL generated so far */
          749  +  sqlite3_str *pStr; /* The normalized SQL string under construction */
          750  +
          751  +  if( zSql==0 || nSql==0 ) return 0;
          752  +  db = sqlite3VdbeDb(pVdbe);
          753  +  tokenType = -1;
          754  +  nParen = iStartIN = nParenAtIN = 0;
          755  +  pStr = sqlite3_str_new(db);
          756  +  for(i=0; i<nSql && pStr->accError==0; i+=n){
          757  +    if( tokenType!=TK_SPACE ){
          758  +      prevType = tokenType;
          759  +    }
          760  +    n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType);
          761  +    if( NEVER(n<=0) ) break;
          762  +    switch( tokenType ){
          763  +      case TK_SPACE: {
          764  +        break;
          765  +      }
          766  +      case TK_NULL: {
          767  +        if( prevType==TK_IS || prevType==TK_NOT ){
          768  +          sqlite3_str_append(pStr, " NULL", 5);
          769  +          break;
          770  +        }
          771  +        /* Fall through */
          772  +      }
          773  +      case TK_STRING:
          774  +      case TK_INTEGER:
          775  +      case TK_FLOAT:
          776  +      case TK_VARIABLE:
          777  +      case TK_BLOB: {
          778  +        sqlite3_str_append(pStr, "?", 1);
          779  +        break;
          780  +      }
          781  +      case TK_LP: {
          782  +        nParen++;
          783  +        if( prevType==TK_IN ){
          784  +          iStartIN = pStr->nChar;
          785  +          nParenAtIN = nParen;
          786  +        }
          787  +        sqlite3_str_append(pStr, "(", 1);
          788  +        break;
          789  +      }
          790  +      case TK_RP: {
          791  +        if( iStartIN>0 && nParen==nParenAtIN ){
          792  +          assert( pStr->nChar>=iStartIN );
          793  +          pStr->nChar = iStartIN+1;
          794  +          sqlite3_str_append(pStr, "?,?,?", 5);
          795  +          iStartIN = 0;
          796  +        }
          797  +        nParen--;
          798  +        sqlite3_str_append(pStr, ")", 1);
          799  +        break;
          800  +      }
          801  +      case TK_ID: {
          802  +        j = pStr->nChar;
          803  +        if( sqlite3Isquote(zSql[i]) ){
          804  +          char *zId = sqlite3DbStrNDup(db, zSql+i, n);
          805  +          int nId;
          806  +          int eType = 0;
          807  +          if( zId==0 ) break;
          808  +          sqlite3Dequote(zId);
          809  +          if( zSql[i]=='"' && sqlite3VdbeUsesDoubleQuotedString(pVdbe, zId) ){
          810  +            sqlite3_str_append(pStr, "?", 1);
          811  +            sqlite3DbFree(db, zId);
          812  +            break;
          813  +          }
          814  +          nId = sqlite3Strlen30(zId);
          815  +          if( sqlite3GetToken((u8*)zId, &eType)==nId && eType==TK_ID ){
          816  +            addSpaceSeparator(pStr);
          817  +            sqlite3_str_append(pStr, zId, nId);
          818  +          }else{
          819  +            sqlite3_str_appendf(pStr, "\"%w\"", zId);
          820  +          }
          821  +          sqlite3DbFree(db, zId);
          822  +        }else{
          823  +          addSpaceSeparator(pStr);
          824  +          sqlite3_str_append(pStr, zSql+i, n);
          825  +        }
          826  +        while( j<pStr->nChar ){
          827  +          pStr->zText[j] = sqlite3Tolower(pStr->zText[j]);
          828  +          j++;
          829  +        }
          830  +        break;
          831  +      }
          832  +      default: {
          833  +        if( sqlite3IsIdChar(zSql[i]) ) addSpaceSeparator(pStr);
          834  +        j = pStr->nChar;
          835  +        sqlite3_str_append(pStr, zSql+i, n);
          836  +        while( j<pStr->nChar ){
          837  +          pStr->zText[j] = sqlite3Toupper(pStr->zText[j]);
          838  +          j++;
          839  +        }
          840  +        break;
          841  +      }
          842  +    }
          843  +  }
          844  +  return sqlite3_str_finish(pStr);
          845  +}
          846  +#endif /* SQLITE_ENABLE_NORMALIZE */

Changes to src/vdbe.h.

   249    249   int sqlite3VdbeSetColName(Vdbe*, int, int, const char *, void(*)(void*));
   250    250   void sqlite3VdbeCountChanges(Vdbe*);
   251    251   sqlite3 *sqlite3VdbeDb(Vdbe*);
   252    252   u8 sqlite3VdbePrepareFlags(Vdbe*);
   253    253   void sqlite3VdbeSetSql(Vdbe*, const char *z, int n, u8);
   254    254   #ifdef SQLITE_ENABLE_NORMALIZE
   255    255   void sqlite3VdbeAddDblquoteStr(sqlite3*,Vdbe*,const char*);
   256         -int sqlite3VdbeUsesDoubleQuotedString(sqlite3*,Vdbe*,const char*,int);
          256  +int sqlite3VdbeUsesDoubleQuotedString(Vdbe*,const char*);
   257    257   #endif
   258    258   void sqlite3VdbeSwap(Vdbe*,Vdbe*);
   259    259   VdbeOp *sqlite3VdbeTakeOpArray(Vdbe*, int*, int*);
   260    260   sqlite3_value *sqlite3VdbeGetBoundValue(Vdbe*, int, u8);
   261    261   void sqlite3VdbeSetVarmask(Vdbe*, int);
   262    262   #ifndef SQLITE_OMIT_TRACE
   263    263     char *sqlite3VdbeExpandSql(Vdbe*, const char*);

Changes to src/vdbeaux.c.

    93     93   
    94     94   #ifdef SQLITE_ENABLE_NORMALIZE
    95     95   /*
    96     96   ** zId of length nId is a double-quoted identifier.  Check to see if
    97     97   ** that identifier is really used as a string literal.
    98     98   */
    99     99   int sqlite3VdbeUsesDoubleQuotedString(
   100         -  sqlite3 *db,            /* Used for transient malloc */
   101    100     Vdbe *pVdbe,            /* The prepared statement */
   102         -  const char *zId,        /* The double-quoted identifier */
   103         -  int nId                 /* Bytes in zId, which is not zero-terminated */
          101  +  const char *zId         /* The double-quoted identifier, already dequoted */
   104    102   ){
   105         -  char *z;
   106    103     DblquoteStr *pStr;
   107    104     assert( zId!=0 );
   108         -  assert( zId[0]=='"' );
   109         -  assert( nId>=2 );
   110         -  assert( zId[nId-1]=='"' );
   111    105     if( pVdbe->pDblStr==0 ) return 0;
   112         -  z = sqlite3DbStrNDup(db, zId, nId);
   113         -  if( z==0 ) return 0;
   114         -  sqlite3Dequote(z);
   115    106     for(pStr=pVdbe->pDblStr; pStr; pStr=pStr->pNextStr){
   116         -    if( strcmp(z, pStr->z)==0 ) break;
          107  +    if( strcmp(zId, pStr->z)==0 ) return 1;
   117    108     }
   118         -  sqlite3DbFree(db, z);
   119         -  return pStr!=0;
          109  +  return 0;
   120    110   }
   121    111   #endif
   122    112   
   123    113   /*
   124    114   ** Swap all content between two VDBE structures.
   125    115   */
   126    116   void sqlite3VdbeSwap(Vdbe *pA, Vdbe *pB){