/ Check-in [d77dbb39]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add IN-operator normalizating and the SQLITE_NORMALIZE_CLI compile-time option for generating a stand-alone program.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | normalize
Files: files | file ages | folders
SHA3-256: d77dbb398afa80c1b3373c55f278491e83d5c80ddc22dbc303876bdcbc127df9
User & Date: drh 2018-01-08 19:18:27
Context
2018-01-08
19:29
Special handling of the NULL keyword. Sometimes it is a literal, and sometimes it is a keyword. check-in: db5d138e user: drh tags: normalize
19:18
Add IN-operator normalizating and the SQLITE_NORMALIZE_CLI compile-time option for generating a stand-alone program. check-in: d77dbb39 user: drh tags: normalize
16:54
First code for an auxiliary function that will normalize an SQL statement. check-in: 84814aac user: drh tags: normalize
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/misc/normalize.c.

    31     31   **   (3)  Lowercase all ASCII characters.
    32     32   **
    33     33   **   (4)  If an IN or NOT IN operator is followed by a list of 1 or more
    34     34   **        values, convert that list into "(?,?,?)".
    35     35   **
    36     36   ** The purpose of normalization is two-fold:
    37     37   **
    38         -**   (1)  Sanitize queries by removing possibly sensitive information contained
    39         -**        in literals.
           38  +**   (1)  Sanitize queries by removing potentially private or sensitive
           39  +**        information contained in literals.
    40     40   **
    41     41   **   (2)  Identify structurally identical queries by comparing their
    42     42   **        normalized forms.
           43  +**
           44  +** Command-Line Utility
           45  +** --------------------
           46  +**
           47  +** This file also contains code for a command-line utility that converts
           48  +** SQL queries in text files into their normalized forms.  To build the
           49  +** command-line program, compile this file with -DSQLITE_NORMALIZE_CLI
           50  +** and link it against the SQLite library.
    43     51   */
    44     52   #include <sqlite3.h>
    45     53   #include <string.h>
    46     54   
    47     55   /*
    48     56   ** Implementation note:
    49     57   **
    50     58   ** Much of the tokenizer logic is copied out of the tokenize.c source file
    51         -** of SQLite.  This logic could be simplified for this particular application,
           59  +** of SQLite.  That logic could be simplified for this particular application,
    52     60   ** but that would impose a risk of introducing subtle errors.  It is best to
    53     61   ** keep the code as close to the original as possible.
           62  +**
           63  +** The tokenize code is in sync with the SQLite core as of 2018-01-08.
           64  +** Any future changes to the core tokenizer might require corresponding
           65  +** adjustments to the tokenizer logic in this module.
    54     66   */
    55     67   
    56     68   
    57     69   /* Character classes for tokenizing
    58     70   **
    59     71   ** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented
    60     72   ** using a lookup table, whereas a switch() directly on c uses a binary search.
................................................................................
   568    580           break;
   569    581         }
   570    582       }
   571    583     }
   572    584     while( j>0 && z[j-1]==' ' ){ j--; }
   573    585     if( i>0 && z[j-1]!=';' ){ z[j++] = ';'; }
   574    586     z[j] = 0;
          587  +
          588  +  /* Make a second pass converting "in(...)" where the "..." is not a
          589  +  ** SELECT statement into "in(?,?,?)" */
          590  +  for(i=0; i<j; i=n){
          591  +    char *zIn = strstr(z+i, "in(");
          592  +    int nParen;
          593  +    if( zIn==0 ) break;
          594  +    n = (int)(zIn-z)+3;  /* Index of first char past "in(" */
          595  +    if( n && IdChar(zIn[-1]) ) continue;
          596  +    if( strncmp(zIn, "in(select",9)==0 && !IdChar(zIn[9]) ) continue;
          597  +    if( strncmp(zIn, "in(with",7)==0 && !IdChar(zIn[7]) ) continue;
          598  +    for(nParen=1, k=0; z[n+k]; k++){
          599  +      if( z[n+k]=='(' ) nParen++;
          600  +      if( z[n+k]==')' ){
          601  +        nParen--;
          602  +        if( nParen==0 ) break;
          603  +      }
          604  +    }
          605  +    /* k is the number of bytes in the "..." within "in(...)" */
          606  +    if( k<5 ){
          607  +      z = sqlite3_realloc64(z, j+(5-k)+1);
          608  +      if( z==0 ) return 0;
          609  +      memmove(z+n+5, z+n+k, j-(n+k));
          610  +    }else if( k>5 ){
          611  +      memmove(z+n+5, z+n+k, j-(n+k));
          612  +    }
          613  +    j = j-k+5;
          614  +    z[j] = 0;
          615  +    memcpy(z+n, "?,?,?", 5);
          616  +  }
   575    617     return z;
   576    618   }
   577    619   
   578         -#ifdef NORMALIZE_TEST
          620  +/*
          621  +** For testing purposes, or to build a stand-alone SQL normalizer program,
          622  +** compile this one source file with the -DSQLITE_NORMALIZE_CLI and link
          623  +** it against any SQLite library.  The resulting command-line program will
          624  +** run sqlite3_normalize() over the text of all files named on the command-
          625  +** line and show the result on standard output.
          626  +*/
          627  +#ifdef SQLITE_NORMALIZE_CLI
   579    628   #include <stdio.h>
   580    629   #include <stdlib.h>
   581    630   
          631  +/*
          632  +** Break zIn up into separate SQL statements and run sqlite3_normalize()
          633  +** on each one.  Print the result of each run.
          634  +*/
   582    635   static void normalizeFile(char *zIn){
   583    636     int i;
   584    637     if( zIn==0 ) return;
   585    638     for(i=0; zIn[i]; i++){
   586    639       char cSaved;
   587    640       if( zIn[i]!=';' ) continue;
   588    641       cSaved = zIn[i+1];
................................................................................
   600    653         i = -1;
   601    654       }else{
   602    655         zIn[i+1] = cSaved;
   603    656       }
   604    657     }
   605    658   }
   606    659   
          660  +/*
          661  +** The main routine for "sql_normalize".  Read files named on the
          662  +** command-line and run the text of each through sqlite3_normalize().
          663  +*/
   607    664   int main(int argc, char **argv){
   608    665     int i;
   609    666     FILE *in;
   610    667     char *zBuf = 0;
   611    668     sqlite3_int64 sz, got;
   612    669   
   613    670     for(i=1; i<argc; i++){
................................................................................
   632    689       }else{
   633    690         zBuf[got] = 0;
   634    691         normalizeFile(zBuf);
   635    692       }
   636    693     }
   637    694     sqlite3_free(zBuf);
   638    695   }
   639         -#endif /* NORMALIZE_TEST */
          696  +#endif /* SQLITE_NORMALIZE_CLI */