/ Check-in [0406ecbb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to the CSV virtual table.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 0406ecbbe75513e342040b71fdd342462222dbb3820486b5f745d7865805c00b
User & Date: drh 2018-11-16 01:42:26
Context
2018-11-16
08:36
Fix a problem with virtual table "fsdir" and some join queries. check-in: 7fffcee0 user: dan tags: trunk
01:42
Improvements to the CSV virtual table. check-in: 0406ecbb user: drh tags: trunk
2018-11-15
19:12
Fix typos in the previous check-in. check-in: 9b37bbf5 user: mistachkin tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/misc/csv.c.

    15     15   **
    16     16   ** Usage:
    17     17   **
    18     18   **    .load ./csv
    19     19   **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
    20     20   **    SELECT * FROM csv;
    21     21   **
    22         -** The columns are named "c1", "c2", "c3", ... by default.  But the
    23         -** application can define its own CREATE TABLE statement as an additional
    24         -** parameter.  For example:
           22  +** The columns are named "c1", "c2", "c3", ... by default.  Or the
           23  +** application can define its own CREATE TABLE statement using the
           24  +** schema= parameter, like this:
    25     25   **
    26     26   **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
    27     27   **       filename = "../http.log",
    28     28   **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
    29     29   **    );
    30     30   **
    31     31   ** Instead of specifying a file, the text of the CSV can be loaded using
    32     32   ** the data= parameter.
    33     33   **
    34     34   ** If the columns=N parameter is supplied, then the CSV file is assumed to have
    35         -** N columns.  If the columns parameter is omitted, the CSV file is opened
    36         -** as soon as the virtual table is constructed and the first row of the CSV
    37         -** is read in order to count the tables.
           35  +** N columns.  If both the columns= and schema= parameters are omitted, then
           36  +** the number and names of the columns is determined by the first line of
           37  +** the CSV input.
    38     38   **
    39     39   ** Some extra debugging features (used for testing virtual tables) are available
    40     40   ** if this module is compiled with -DSQLITE_TEST.
    41     41   */
    42     42   #include <sqlite3ext.h>
    43     43   SQLITE_EXTENSION_INIT1
    44     44   #include <string.h>
................................................................................
   432    432      || (z[0]=='0' && z[1]==0)
   433    433     ){
   434    434       return 0;
   435    435     }
   436    436     return -1;
   437    437   }
   438    438   
          439  +/* Check to see if the string is of the form:  "TAG = BOOLEAN" or just "TAG".
          440  +** If it is, set *pValue to be the value of the boolean ("true" if there is
          441  +** not "= BOOLEAN" component) and return non-zero.  If the input string
          442  +** does not begin with TAG, return zero.
          443  +*/
          444  +static int csv_boolean_parameter(
          445  +  const char *zTag,       /* Tag we are looking for */
          446  +  int nTag,               /* Size of the tag in bytes */
          447  +  const char *z,          /* Input parameter */
          448  +  int *pValue             /* Write boolean value here */
          449  +){
          450  +  int b;
          451  +  z = csv_skip_whitespace(z);
          452  +  if( strncmp(zTag, z, nTag)!=0 ) return 0;
          453  +  z = csv_skip_whitespace(z + nTag);
          454  +  if( z[0]==0 ){
          455  +    *pValue = 1;
          456  +    return 1;
          457  +  }
          458  +  if( z[0]!='=' ) return 0;
          459  +  z = csv_skip_whitespace(z+1);
          460  +  b = csv_boolean(z);
          461  +  if( b>=0 ){
          462  +    *pValue = b;
          463  +    return 1;
          464  +  }
          465  +  return 0;
          466  +}
   439    467   
   440    468   /*
   441    469   ** Parameters:
   442    470   **    filename=FILENAME          Name of file containing CSV content
   443    471   **    data=TEXT                  Direct CSV content.
   444    472   **    schema=SCHEMA              Alternative CSV schema.
   445    473   **    header=YES|NO              First row of CSV defines the names of
................................................................................
   465    493     CsvTable *pNew = 0;        /* The CsvTable object to construct */
   466    494     int bHeader = -1;          /* header= flags.  -1 means not seen yet */
   467    495     int rc = SQLITE_OK;        /* Result code from this routine */
   468    496     int i, j;                  /* Loop counters */
   469    497   #ifdef SQLITE_TEST
   470    498     int tstFlags = 0;          /* Value for testflags=N parameter */
   471    499   #endif
          500  +  int b;                     /* Value of a boolean parameter */
   472    501     int nCol = -99;            /* Value of the columns= parameter */
   473    502     CsvReader sRdr;            /* A CSV file reader used to store an error
   474    503                                ** message and/or to count the number of columns */
   475    504     static const char *azParam[] = {
   476    505        "filename", "data", "schema", 
   477    506     };
   478    507     char *azPValue[3];         /* Parameter values */
................................................................................
   489    518       const char *zValue;
   490    519       for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
   491    520         if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
   492    521       }
   493    522       if( j<sizeof(azParam)/sizeof(azParam[0]) ){
   494    523         if( sRdr.zErr[0] ) goto csvtab_connect_error;
   495    524       }else
   496         -    if( (zValue = csv_parameter("header",6,z))!=0 ){
   497         -      int x;
          525  +    if( csv_boolean_parameter("header",6,z,&b) ){
   498    526         if( bHeader>=0 ){
   499    527           csv_errmsg(&sRdr, "more than one 'header' parameter");
   500    528           goto csvtab_connect_error;
   501    529         }
   502         -      x = csv_boolean(zValue);
   503         -      if( x==1 ){
   504         -        bHeader = 1;
   505         -      }else if( x==0 ){
   506         -        bHeader = 0;
   507         -      }else{
   508         -        csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
   509         -        goto csvtab_connect_error;
   510         -      }
          530  +      bHeader = b;
   511    531       }else
   512    532   #ifdef SQLITE_TEST
   513    533       if( (zValue = csv_parameter("testflags",9,z))!=0 ){
   514    534         tstFlags = (unsigned int)atoi(zValue);
   515    535       }else
   516    536   #endif
   517    537       if( (zValue = csv_parameter("columns",7,z))!=0 ){
   518    538         if( nCol>0 ){
   519    539           csv_errmsg(&sRdr, "more than one 'columns' parameter");
   520    540           goto csvtab_connect_error;
   521    541         }
   522    542         nCol = atoi(zValue);
   523    543         if( nCol<=0 ){
   524         -        csv_errmsg(&sRdr, "must have at least one column");
          544  +        csv_errmsg(&sRdr, "column= value must be positive");
   525    545           goto csvtab_connect_error;
   526    546         }
   527    547       }else
   528    548       {
   529         -      csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
          549  +      csv_errmsg(&sRdr, "bad parameter: '%s'", z);
   530    550         goto csvtab_connect_error;
   531    551       }
   532    552     }
   533    553     if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
   534         -    csv_errmsg(&sRdr, "must either filename= or data= but not both");
          554  +    csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
   535    555       goto csvtab_connect_error;
   536    556     }
   537         -  if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
          557  +
          558  +  if( (nCol<=0 || bHeader==1)
          559  +   && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
          560  +  ){
   538    561       goto csvtab_connect_error;
   539    562     }
   540    563     pNew = sqlite3_malloc( sizeof(*pNew) );
   541    564     *ppVtab = (sqlite3_vtab*)pNew;
   542    565     if( pNew==0 ) goto csvtab_connect_oom;
   543    566     memset(pNew, 0, sizeof(*pNew));
   544         -  if( nCol>0 ){
          567  +  if( CSV_SCHEMA==0 ){
          568  +    sqlite3_str *pStr = sqlite3_str_new(0);
          569  +    char *zSep = "";
          570  +    int iCol = 0;
          571  +    sqlite3_str_appendf(pStr, "CREATE TABLE x(");
          572  +    if( nCol<0 && bHeader<1 ){
          573  +      nCol = 0;
          574  +      do{
          575  +        csv_read_one_field(&sRdr);
          576  +        nCol++;
          577  +      }while( sRdr.cTerm==',' );
          578  +    }
          579  +    if( nCol>0 && bHeader<1 ){
          580  +      for(iCol=0; iCol<nCol; iCol++){
          581  +        sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
          582  +        zSep = ",";
          583  +      }
          584  +    }else{
          585  +      do{
          586  +        char *z = csv_read_one_field(&sRdr);
          587  +        if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
          588  +          sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
          589  +          zSep = ",";
          590  +          iCol++;
          591  +        }
          592  +      }while( sRdr.cTerm==',' );
          593  +      if( nCol<0 ){
          594  +        nCol = iCol;
          595  +      }else{
          596  +        while( iCol<nCol ){
          597  +          sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
          598  +          zSep = ",";
          599  +        }
          600  +      }
          601  +    }
   545    602       pNew->nCol = nCol;
   546         -  }else{
          603  +    sqlite3_str_appendf(pStr, ")");
          604  +    CSV_SCHEMA = sqlite3_str_finish(pStr);
          605  +    if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
          606  +  }else if( nCol<0 ){
   547    607       do{
   548    608         csv_read_one_field(&sRdr);
   549    609         pNew->nCol++;
   550    610       }while( sRdr.cTerm==',' );
          611  +  }else{
          612  +    pNew->nCol = nCol;
   551    613     }
   552    614     pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
   553    615     pNew->zData = CSV_DATA;          CSV_DATA = 0;
   554    616   #ifdef SQLITE_TEST
   555    617     pNew->tstFlags = tstFlags;
   556    618   #endif
   557         -  pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
   558         -  csv_reader_reset(&sRdr);
   559         -  if( CSV_SCHEMA==0 ){
   560         -    char *zSep = "";
   561         -    CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
   562         -    if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
   563         -    for(i=0; i<pNew->nCol; i++){
   564         -      CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
   565         -      zSep = ",";
   566         -    }
   567         -    CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
          619  +  if( bHeader!=1 ){
          620  +    pNew->iStart = 0;
          621  +  }else if( pNew->zData ){
          622  +    pNew->iStart = (int)sRdr.iIn;
          623  +  }else{
          624  +    pNew->iStart = ftell(sRdr.in);
   568    625     }
          626  +  csv_reader_reset(&sRdr);
   569    627     rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
   570         -  if( rc ) goto csvtab_connect_error;
          628  +  if( rc ){
          629  +    csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
          630  +    goto csvtab_connect_error;
          631  +  }
   571    632     for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
   572    633       sqlite3_free(azPValue[i]);
   573    634     }
   574    635     return SQLITE_OK;
   575    636   
   576    637   csvtab_connect_oom:
   577    638     rc = SQLITE_NOMEM;

Changes to test/csv01.test.

    33     33   } {9 10 11 12}
    34     34   do_execsql_test 1.1 {
    35     35     SELECT * FROM t1 WHERE c1='10';
    36     36   } {9 10 11 12}
    37     37   do_execsql_test 1.2 {
    38     38     SELECT rowid FROM t1;
    39     39   } {1 2 3 4}
           40  +
           41  +do_execsql_test 1.3 {
           42  +  DROP TABLE temp.t1;
           43  +  CREATE VIRTUAL TABLE temp.t1 USING csv(
           44  +    data=
           45  +'a,b,"mix-bloom-eel","soft opinion"
           46  +1,2,3,4
           47  +5,6,7,8
           48  +9,10,11,12
           49  +13,14,15,16
           50  +',
           51  +    header=1
           52  +  );
           53  +  SELECT * FROM t1 WHERE "soft opinion"=12;
           54  +} {9 10 11 12}
           55  +do_execsql_test 1.4 {
           56  +  SELECT name FROM pragma_table_xinfo('t1');
           57  +} {a b mix-bloom-eel {soft opinion}}
           58  +
           59  +do_execsql_test 1.5 {
           60  +  DROP TABLE temp.t1;
           61  +  CREATE VIRTUAL TABLE temp.t1 USING csv(
           62  +    data=
           63  +'a,b,"mix-bloom-eel","soft opinion"
           64  +1,2,3,4
           65  +5,6,7,8
           66  +9,10,11,12
           67  +13,14,15,16
           68  +',
           69  +    header=false
           70  +  );
           71  +  SELECT * FROM t1 WHERE c1='b';
           72  +} {a b mix-bloom-eel {soft opinion}}
           73  +do_execsql_test 1.6 {
           74  +  SELECT name FROM pragma_table_xinfo('t1');
           75  +} {c0 c1 c2 c3}
           76  +
           77  +do_execsql_test 1.7 {
           78  +  DROP TABLE temp.t1;
           79  +  CREATE VIRTUAL TABLE temp.t1 USING csv(
           80  +    data=
           81  +'a,b,"mix-bloom-eel","soft opinion"
           82  +1,2,3,4
           83  +5,6,7,8
           84  +9,10,11,12
           85  +13,14,15,16
           86  +',
           87  +    header,
           88  +    schema='CREATE TABLE x(x0,x1,x2,x3,x4)',
           89  +    columns=5
           90  +  );
           91  +  SELECT * FROM t1 WHERE x1='6';
           92  +} {5 6 7 8 {}}
           93  +do_execsql_test 1.8 {
           94  +  SELECT name FROM pragma_table_xinfo('t1');
           95  +} {x0 x1 x2 x3 x4}
           96  +
    40     97   
    41     98   do_execsql_test 2.0 {
    42     99     DROP TABLE t1;
    43    100     CREATE VIRTUAL TABLE temp.t2 USING csv(
    44    101       data=
    45    102   '1,2,3,4
    46    103   5,6,7,8
................................................................................
   103    160   9,10,11,12
   104    161   13,14,15,16',
   105    162       columns=4,
   106    163       schema=
   107    164         'CREATE TABLE t3(a,b,c,d,PRIMARY KEY(a,b)) WITHOUT ROWID',
   108    165       testflags=1
   109    166     );
   110         -} {1 {vtable constructor failed: t4}}
          167  +} {1 {bad schema: 'CREATE TABLE t3(a,b,c,d,PRIMARY KEY(a,b)) WITHOUT ROWID' - not an error}}
   111    168   
   112    169   # WITHOUT ROWID tables with a single-column PRIMARY KEY may be writable.
   113    170   do_catchsql_test 4.1 {
   114    171     DROP TABLE IF EXISTS t4;
   115    172     CREATE VIRTUAL TABLE temp.t4 USING csv_wr(
   116    173       data=
   117    174   '1,2,3,4
................................................................................
   134    191         9,10,11,12
   135    192         13,14,15,16',
   136    193         columns=4,
   137    194         schema=
   138    195         'CREATE TABLE t3(a,b,c,d) WITHOUT ROWID',
   139    196         testflags=1
   140    197         );
   141         -} {1 {vtable constructor failed: t5}}
          198  +} {1 {bad schema: 'CREATE TABLE t3(a,b,c,d) WITHOUT ROWID' - PRIMARY KEY missing on table t3}}
   142    199   
   143    200   # 2018-04-24
   144    201   # Memory leak reported on the sqlite-users mailing list by Ralf Junker.
   145    202   #
   146    203   do_catchsql_test 4.3 {
   147    204     CREATE VIRTUAL TABLE IF NOT EXISTS temp.t1
   148    205     USING csv(filename='FileDoesNotExist.csv');